[PATCH v2 03/12] sched: Create preempt_count invariant

From: Peter Zijlstra
Date: Wed Sep 30 2015 - 03:31:32 EST


Assuming units of PREEMPT_DISABLE_OFFSET for preempt_count() numbers.

Now that TASK_DEAD no longer results in preempt_count() == 3 during
scheduling, we will always call context_switch() with preempt_count()
== 2.

However, we don't always end up with preempt_count() == 2 in
finish_task_switch() because new tasks get created with
preempt_count() == 1.

Create FORK_PREEMPT_COUNT and set it to 2 and use that in the right
places. Note that we cannot use INIT_PREEMPT_COUNT as that serves
another purpose (boot).

After this, preempt_count() is invariant across the context switch,
with exception of PREEMPT_ACTIVE.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/preempt.h | 2 +-
include/asm-generic/preempt.h | 2 +-
include/linux/sched.h | 17 ++++++++++++-----
kernel/sched/core.c | 23 +++++++++++++++++++++--
4 files changed, 35 insertions(+), 9 deletions(-)

--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -31,7 +31,7 @@ static __always_inline void preempt_coun
* must be macros to avoid header recursion hell
*/
#define init_task_preempt_count(p) do { \
- task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
+ task_thread_info(p)->saved_preempt_count = FORK_PREEMPT_COUNT; \
} while (0)

#define init_idle_preempt_count(p, cpu) do { \
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -24,7 +24,7 @@ static __always_inline void preempt_coun
* must be macros to avoid header recursion hell
*/
#define init_task_preempt_count(p) do { \
- task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
+ task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
} while (0)

#define init_idle_preempt_count(p, cpu) do { \
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -599,11 +599,7 @@ struct task_cputime_atomic {
.sum_exec_runtime = ATOMIC64_INIT(0), \
}

-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
-#else
-#define PREEMPT_DISABLED PREEMPT_ENABLED
-#endif
+#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)

/*
* Disable preemption until the scheduler is running -- use an unconditional
@@ -613,6 +609,17 @@ struct task_cputime_atomic {
*/
#define INIT_PREEMPT_COUNT PREEMPT_OFFSET

+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ * preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
/**
* struct thread_group_cputimer - thread group interval timer counts
* @cputime_atomic: atomic thread group interval timers.
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2504,6 +2504,18 @@ static struct rq *finish_task_switch(str
struct mm_struct *mm = rq->prev_mm;
long prev_state;

+ /*
+ * The previous task will have left us with a preempt_count of 2
+ * because it left us after:
+ *
+ * schedule()
+ * preempt_disable(); // 1
+ * __schedule()
+ * raw_spin_lock_irq(&rq->lock) // 2
+ *
+ * Also, see FORK_PREEMPT_COUNT.
+ */
+
rq->prev_mm = NULL;

/*
@@ -2588,8 +2600,15 @@ asmlinkage __visible void schedule_tail(
{
struct rq *rq;

- /* finish_task_switch() drops rq->lock and enables preemtion */
- preempt_disable();
+ /*
+ * New tasks start with FORK_PREEMPT_COUNT, see there and
+ * finish_task_switch() for details.
+ *
+ * finish_task_switch() will drop rq->lock() and lower preempt_count
+ * and the preempt_enable() will end up enabling preemption (on
+ * PREEMPT_COUNT kernels).
+ */
+
rq = finish_task_switch(prev);
balance_callback(rq);
preempt_enable();


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/