[RFC PATCH v2 1/3] sched/fair: Co-locate cfs_rq and sched_entity

From: Zecheng Li
Date: Mon Jun 09 2025 - 15:39:00 EST


Improve data locality and reduce pointer chasing by allocating struct
cfs_rq and struct sched_entity together for non-root task groups. This
is achieved by introducing a new combined struct cfs_rq_with_se, that
holds both objects in contiguous memory.

This patch:

- Defines the new struct cfs_rq_with_se.

- Modifies alloc_fair_sched_group() and free_fair_sched_group() to
allocate and free the new struct as a single unit.

- Modifies the per-CPU pointers in task_group->se and task_group->cfs_rq
to point to the members in the new combined structure.

Signed-off-by: Zecheng Li <zecheng@xxxxxxxxxx>
---
kernel/sched/fair.c | 23 ++++++++++-------------
kernel/sched/sched.h | 8 ++++++++
2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995a47..cd090ceec633 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13341,10 +13341,11 @@ void free_fair_sched_group(struct task_group *tg)
int i;

for_each_possible_cpu(i) {
- if (tg->cfs_rq)
- kfree(tg->cfs_rq[i]);
- if (tg->se)
- kfree(tg->se[i]);
+ if (tg->cfs_rq && tg->cfs_rq[i]) {
+ struct cfs_rq_with_se *combined =
+ container_of(tg->cfs_rq[i], struct cfs_rq_with_se, cfs_rq);
+ kfree(combined);
+ }
}

kfree(tg->cfs_rq);
@@ -13353,6 +13354,7 @@ void free_fair_sched_group(struct task_group *tg)

int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
+ struct cfs_rq_with_se *combined;
struct sched_entity *se;
struct cfs_rq *cfs_rq;
int i;
@@ -13369,16 +13371,13 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));

for_each_possible_cpu(i) {
- cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+ combined = kzalloc_node(sizeof(struct cfs_rq_with_se),
GFP_KERNEL, cpu_to_node(i));
- if (!cfs_rq)
+ if (!combined)
goto err;

- se = kzalloc_node(sizeof(struct sched_entity_stats),
- GFP_KERNEL, cpu_to_node(i));
- if (!se)
- goto err_free_rq;
-
+ cfs_rq = &combined->cfs_rq;
+ se = &combined->se;
init_cfs_rq(cfs_rq);
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
init_entity_runnable_average(se);
@@ -13386,8 +13385,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)

return 1;

-err_free_rq:
- kfree(cfs_rq);
err:
return 0;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47972f34ea70..af23917194fb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -740,6 +740,14 @@ struct cfs_rq {
#endif /* CONFIG_FAIR_GROUP_SCHED */
};

+#ifdef CONFIG_FAIR_GROUP_SCHED
+struct cfs_rq_with_se {
+ struct cfs_rq cfs_rq;
+ /* cfs_rq's sched_entity on parent runqueue */
+ struct sched_entity se ____cacheline_aligned;
+};
+#endif
+
#ifdef CONFIG_SCHED_CLASS_EXT
/* scx_rq->flags, protected by the rq lock */
enum scx_rq_flags {
--
2.50.0.rc0.604.gd4ff7b7c86-goog