Re: [PATCH v1 1/1] sched/fair: do not preempt current task if it is going to call schedule()

From: Kees Cook
Date: Thu Mar 05 2020 - 03:43:46 EST


On Thu, Mar 05, 2020 at 04:16:11PM +0800, cl@xxxxxxxxxxxxxx wrote:
> From: Liang Chen <cl@xxxxxxxxxxxxxx>
>
> when we create a kthread with ktrhead_create_on_cpu(),the child thread
> entry is ktread.c:ktrhead() which will be preempted by the parent after
> call complete(done) while schedule() is not called yet,then the parent
> will call wait_task_inactive(child) but the child is still on the runqueue,
> so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
> time,especially on startup.
>
> parent child
> ktrhead_create_on_cpu()
> wait_fo_completion(&done) -----> ktread.c:ktrhead()
> |----- complete(done);--wakeup and preempted by parent
> kthread_bind() <------------| |-> schedule();--dequeue here
> wait_task_inactive(child) |
> schedule_hrtimeout(1 jiffy) -|
>
> So we hope the child just wakeup parent but not preempted by parent, and the
> child is going to call schedule() soon,then the parent will not call
> schedule_hrtimeout(1 jiffy) as the child is already dequeue.
>
> The same issue for ktrhead_park()&&kthread_parkme().
> This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

Interesting improvement!

>
> Signed-off-by: Liang Chen <cl@xxxxxxxxxxxxxx>
> ---
> arch/arm/include/asm/thread_info.h | 1 +
> arch/arm64/include/asm/thread_info.h | 1 +
> include/linux/sched.h | 15 +++++++++++++++
> kernel/kthread.c | 4 ++++
> kernel/sched/fair.c | 4 ++++
> 5 files changed, 25 insertions(+)
>
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index 0d0d5178e2c3..51802991ba1f 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
> #define TIF_USING_IWMMXT 17
> #define TIF_MEMDIE 18 /* is terminating due to OOM killer */
> #define TIF_RESTORE_SIGMASK 20
> +#define TIF_GOING_TO_SCHED 27 /* task is going to call schedule() */
>
> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index f0cec4160136..332786f11dc3 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -78,6 +78,7 @@ void arch_release_task_struct(struct task_struct *tsk);
> #define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
> #define TIF_SSBD 25 /* Wants SSB mitigation */
> #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
> +#define TIF_GOING_TO_SCHED 27 /* task is going to call schedule() */
>
> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)

I don't think you want a TIF flag for this (they're used in special
places, especially in entry code, etc). Since you're only changing
"normal" C code, I would suggest a atomic_flags addition instead:

#define PFA_GOING_TO_SCHED 8
...
TASK_PFA_TEST(GOING_TO_SCHED, going_to_sched)
TASK_PFA_SET(GOING_TO_SCHED, going_to_sched)
TASK_PFA_CLEAR(GOING_TO_SCHED, going_to_sched)

(Also if you used TIF, you'd need to add the TIF to every architecture
to use it in the common scheduler code, which too much work.)

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 04278493bf15..cb9058d2cf0b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1768,6 +1768,21 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
> return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
> }
>
> +static inline void set_tsk_going_to_sched(struct task_struct *tsk)
> +{
> + set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
> +}
> +
> +static inline void clear_tsk_going_to_sched(struct task_struct *tsk)
> +{
> + clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
> +}
> +
> +static inline int test_tsk_going_to_sched(struct task_struct *tsk)
> +{
> + return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
> +}

Then you can drop these wrappers since you'll have the test/set/clear
functions declared above (task_set/clear_going_to_sched(),
task_going_to_sched()) with the TASK_PFA... macros.

> +
> /*
> * cond_resched() and cond_resched_lock(): latency reduction via
> * explicit rescheduling in places that are safe. The return
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..8a4e4c9cdc22 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
> if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
> break;
>
> + set_tsk_going_to_sched(current);

task_set_going_to_sched(current);

> complete(&self->parked);
> schedule();
> + clear_tsk_going_to_sched(current);

task_clear_going_to_sched(current);

> }
> __set_current_state(TASK_RUNNING);
> }
> @@ -245,8 +247,10 @@ static int kthread(void *_create)
> /* OK, tell user we're spawned, wait for stop or wakeup */
> __set_current_state(TASK_UNINTERRUPTIBLE);
> create->result = current;
> + set_tsk_going_to_sched(current);
> complete(done);
> schedule();
> + clear_tsk_going_to_sched(current);

etc.

>
> ret = -EINTR;
> if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3c8a379c357e..28a308743bf8 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4330,6 +4330,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
> hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
> return;
> #endif
> + if (test_tsk_going_to_sched(rq_of(cfs_rq)->curr))
> + return;

if (unlikely(task_going_to_sched(rq_of(cfs_rq)->curr)))
return;

>
> if (cfs_rq->nr_running > 1)
> check_preempt_tick(cfs_rq, curr);
> @@ -6633,6 +6635,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
> */
> if (test_tsk_need_resched(curr))
> return;
> + if (test_tsk_going_to_sched(curr))
> + return;

same.

>
> /* Idle tasks are by definition preempted by non-idle tasks. */
> if (unlikely(task_has_idle_policy(curr)) &&
> --
> 2.17.1
>
>
>

I'd add comments above each of the "return" cases to help people
understand why the test is important.

--
Kees Cook