[RFC PATCH 3/6] sched/dl: Try better placement even for deadline tasks that do not block

From: Luca Abeni
Date: Mon May 06 2019 - 01:49:54 EST


From: luca abeni <luca.abeni@xxxxxxxxxxxxxxx>

Currently, the scheduler tries to find a proper placement for
SCHED_DEADLINE tasks when they are pushed out of a core or when
they wake up. Hence, if there is a single SCHED_DEADLINE task
that never blocks and wakes up, such a task is never migrated to
an appropriate CPU core, but continues to execute on its original
core.

This commit addresses the issue by trying to migrate a SCHED_DEADLINE
task (searching for an appropriate CPU core) the first time it is
throttled.

Signed-off-by: luca abeni <luca.abeni@xxxxxxxxxxxxxxx>
---
include/linux/sched.h | 1 +
kernel/sched/deadline.c | 53 ++++++++++++++++++++++++++++++++++++-----
kernel/sched/sched.h | 2 ++
3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 863f70843875..5e322c8a94e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -560,6 +560,7 @@ struct sched_dl_entity {
unsigned int dl_yielded : 1;
unsigned int dl_non_contending : 1;
unsigned int dl_overrun : 1;
+ unsigned int dl_adjust : 1;

/*
* Bandwidth enforcement timer. Each -deadline task has its
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3436f3d8fa8f..db471889196b 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -515,6 +515,7 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return dl_task(prev);
}

+static DEFINE_PER_CPU(struct callback_head, dl_migrate_head);
static DEFINE_PER_CPU(struct callback_head, dl_push_head);
static DEFINE_PER_CPU(struct callback_head, dl_pull_head);

@@ -1149,6 +1150,32 @@ static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
return (delta * u_act) >> BW_SHIFT;
}

+#ifdef CONFIG_SMP
+static int find_later_rq(struct task_struct *task);
+
+static void migrate_dl_task(struct rq *rq)
+{
+ struct task_struct *t = rq->migrating_task;
+ struct sched_dl_entity *dl_se = &t->dl;
+ int cpu = find_later_rq(t);
+
+ if ((cpu != -1) && (cpu != rq->cpu)) {
+ struct rq *later_rq;
+
+ later_rq = cpu_rq(cpu);
+
+ double_lock_balance(rq, later_rq);
+ sub_running_bw(&t->dl, &rq->dl);
+ sub_rq_bw(&t->dl, &rq->dl);
+ set_task_cpu(t, later_rq->cpu);
+ add_rq_bw(&t->dl, &later_rq->dl);
+ add_running_bw(&t->dl, &later_rq->dl);
+ double_unlock_balance(rq, later_rq);
+ }
+ rq->migrating_task = NULL;
+ dl_se->dl_adjust = 0;
+}
+#endif
/*
* Update the current task's runtime statistics (provided it is still
* a -deadline task and has not been removed from the dl_rq).
@@ -1223,8 +1250,17 @@ static void update_curr_dl(struct rq *rq)
dl_se->dl_overrun = 1;

__dequeue_task_dl(rq, curr, 0);
- if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
+ if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) {
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
+#ifdef CONFIG_SMP
+ } else if (dl_se->dl_adjust) {
+ if (rq->migrating_task == NULL) {
+ queue_balance_callback(rq, &per_cpu(dl_migrate_head, rq->cpu), migrate_dl_task);
+ rq->migrating_task = current;
+ } else
+ printk_deferred("Throttled task before migratin g the previous one???\n");
+#endif
+ }

if (!is_leftmost(curr, &rq->dl))
resched_curr(rq);
@@ -1573,13 +1609,12 @@ static void yield_task_dl(struct rq *rq)

#ifdef CONFIG_SMP

-static int find_later_rq(struct task_struct *task);
-
static int
select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
+ bool het;

if (sd_flag != SD_BALANCE_WAKE)
goto out;
@@ -1591,6 +1626,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)

rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
+ het = static_branch_unlikely(&sched_asym_cpucapacity);

/*
* If we are dealing with a -deadline task, we must
@@ -1604,15 +1640,17 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
if ((unlikely(dl_task(curr)) &&
(curr->nr_cpus_allowed < 2 ||
!dl_entity_preempt(&p->dl, &curr->dl)) &&
- (p->nr_cpus_allowed > 1)) ||
- static_branch_unlikely(&sched_asym_cpucapacity)) {
+ (p->nr_cpus_allowed > 1)) || het) {
int target = find_later_rq(p);

if (target != -1 &&
(dl_time_before(p->dl.deadline,
cpu_rq(target)->dl.earliest_dl.curr) ||
- (cpu_rq(target)->dl.dl_nr_running == 0)))
+ (cpu_rq(target)->dl.dl_nr_running == 0))) {
+ if (het && (target != cpu))
+ p->dl.dl_adjust = 1;
cpu = target;
+ }
}
rcu_read_unlock();

@@ -2369,6 +2407,9 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
else
resched_curr(rq);
}
+
+ if (static_branch_unlikely(&sched_asym_cpucapacity))
+ p->dl.dl_adjust = 1;
}

/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e5f9fd3aee80..1a8f75338ac2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -963,6 +963,8 @@ struct rq {

/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
+
+ struct task_struct *migrating_task;
#endif

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
--
2.20.1