[PATCH] sched/fair: fix pick_eevdf to always find the correct se

From: Benjamin Segall
Date: Fri Sep 29 2023 - 20:09:39 EST


The old pick_eevdf could fail to find the actual earliest eligible
deadline when it descended to the right looking for min_deadline, but it
turned out that that min_deadline wasn't actually eligible. In that case
we need to go back and search through any left branches we skipped
looking for the actual best _eligible_ min_deadline.

This is more expensive, but still O(log n), and at worst should only
involve descending two branches of the rbtree.

I've run this through a userspace stress test (thank you
tools/lib/rbtree.c), so hopefully this implementation doesn't miss any
corner cases.

Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Signed-off-by: Ben Segall <bsegall@xxxxxxxxxx>
---
kernel/sched/fair.c | 72 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0c31cda0712f..77e9440b8ab3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -864,18 +864,20 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
*
* se->min_deadline = min(se->deadline, se->{left,right}->min_deadline)
*
* Which allows an EDF like search on (sub)trees.
*/
-static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
{
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
struct sched_entity *curr = cfs_rq->curr;
struct sched_entity *best = NULL;
+ struct sched_entity *best_left = NULL;

if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
curr = NULL;
+ best = curr;

/*
* Once selected, run a task until it either becomes non-eligible or
* until it gets a new slice. See the HACK in set_next_entity().
*/
@@ -892,45 +894,87 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
node = node->rb_left;
continue;
}

/*
- * If this entity has an earlier deadline than the previous
- * best, take this one. If it also has the earliest deadline
- * of its subtree, we're done.
+ * Now we heap search eligible trees for the best (min_)deadline
*/
- if (!best || deadline_gt(deadline, best, se)) {
+ if (!best || deadline_gt(deadline, best, se))
best = se;
- if (best->deadline == best->min_deadline)
- break;
- }

/*
- * If the earlest deadline in this subtree is in the fully
- * eligible left half of our space, go there.
+ * Every se in a left branch is eligible, keep track of the
+ * branch with the best min_deadline
*/
+ if (node->rb_left) {
+ struct sched_entity *left = __node_2_se(node->rb_left);
+
+ if (!best_left || deadline_gt(min_deadline, best_left, left))
+ best_left = left;
+
+ /*
+ * min_deadline is in the left branch. rb_left and all
+ * descendants are eligible, so immediately switch to the second
+ * loop.
+ */
+ if (left->min_deadline == se->min_deadline)
+ break;
+ }
+
+ /* min_deadline is at this node, no need to look right */
+ if (se->deadline == se->min_deadline)
+ break;
+
+ /* else min_deadline is in the right branch. */
+ node = node->rb_right;
+ }
+
+ /*
+ * We ran into an eligible node which is itself the best.
+ * (Or nr_running == 0 and both are NULL)
+ */
+ if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0)
+ return best;
+
+ /*
+ * Now best_left and all of its children are eligible, and we are just
+ * looking for deadline == min_deadline
+ */
+ node = &best_left->run_node;
+ while (node) {
+ struct sched_entity *se = __node_2_se(node);
+
+ /* min_deadline is the current node */
+ if (se->deadline == se->min_deadline)
+ return se;
+
+ /* min_deadline is in the left branch */
if (node->rb_left &&
__node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
node = node->rb_left;
continue;
}

+ /* else min_deadline is in the right branch */
node = node->rb_right;
}
+ return NULL;
+}

- if (!best || (curr && deadline_gt(deadline, best, curr)))
- best = curr;
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *se = __pick_eevdf(cfs_rq);

- if (unlikely(!best)) {
+ if (!se) {
struct sched_entity *left = __pick_first_entity(cfs_rq);
if (left) {
pr_err("EEVDF scheduling fail, picking leftmost\n");
return left;
}
}

- return best;
+ return se;
}

#ifdef CONFIG_SCHED_DEBUG
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
--
2.42.0.582.g8ccd20d70d-goog