[RFC PATCH v2] sched/fair: search a task from the tail of the queue

From: Uladzislau Rezki (Sony)
Date: Wed Sep 13 2017 - 06:25:05 EST


From: Uladzislau Rezki <urezki@xxxxxxxxx>

As a first step this patch makes cfs_tasks list as MRU one.
It means, that when a next task is picked to run on physical
CPU it is moved to the front of the list.

Therefore, the cfs_tasks list is more or less sorted (except
woken tasks) starting from recently given CPU time tasks toward
tasks with max wait time in a run-queue, i.e. MRU list.

Second, as part of the load balance operation, this approach
starts detach_tasks()/detach_one_task() from the tail of the
queue instead of the head, giving some advantages:

- tends to pick a task with highest wait time;
- tasks located in the tail are less likely cache-hot,
therefore the can_migrate_task() decision is higher.

hackbench illustrates slightly better performance. For example
doing 1000 samples and 40 groups on i5-3320M CPU, it shows below
figures:

default: 0.657 avg
patched: 0.646 avg

Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx>
---
kernel/sched/fair.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c77e4b1d51c0..b598907e5236 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6354,10 +6354,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
set_next_entity(cfs_rq, se);
}

- if (hrtick_enabled(rq))
- hrtick_start_fair(rq, p);
-
- return p;
+ goto done;
simple:
cfs_rq = &rq->cfs;
#endif
@@ -6375,6 +6372,16 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf

p = task_of(se);

+done: __maybe_unused
+#ifdef CONFIG_SMP
+ /*
+ * Move the next running task to the front of
+ * the list, so our cfs_tasks list becomes MRU
+ * one.
+ */
+ list_move(&p->se.group_node, &rq->cfs_tasks);
+#endif
+
if (hrtick_enabled(rq))
hrtick_start_fair(rq, p);

@@ -6806,11 +6813,12 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
*/
static struct task_struct *detach_one_task(struct lb_env *env)
{
- struct task_struct *p, *n;
+ struct task_struct *p;

lockdep_assert_held(&env->src_rq->lock);

- list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
+ list_for_each_entry_reverse(p,
+ &env->src_rq->cfs_tasks, se.group_node) {
if (!can_migrate_task(p, env))
continue;

@@ -6856,7 +6864,7 @@ static int detach_tasks(struct lb_env *env)
if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1)
break;

- p = list_first_entry(tasks, struct task_struct, se.group_node);
+ p = list_last_entry(tasks, struct task_struct, se.group_node);

env->loop++;
/* We've more or less seen every task there is, call it quits */
@@ -6906,7 +6914,7 @@ static int detach_tasks(struct lb_env *env)

continue;
next:
- list_move_tail(&p->se.group_node, tasks);
+ list_move(&p->se.group_node, tasks);
}

/*
--
2.11.0