[RFC PATCH v2 3/3] sched/fair: Reorder struct sched_entity
From: Zecheng Li
Date: Mon Jun 02 2025 - 14:06:45 EST
Groups the mostly read fields in struct sched_entity to the head of the
struct when `CONFIG_FAIR_GROUP_SCHED` is set. The additional fields from
`CONFIG_FAIR_GROUP_SCHED` are related to CFS cgroup scheduling and were
placed far away from the hot fields `load`, `on_rq` and `vruntime`. They
are moved together to the head of the struct to exploit locality.
Although `depth` is not as hot as other fields, we keep it here to avoid
breaking the #ifdef boundaries. Adds enforced alignment of struct
sched_entity to ensure the cache group works as intended.
Also adds a compile time check when `CONFIG_FAIR_GROUP_SCHED` is set to
check the placement of the hot fields.
Signed-off-by: Zecheng Li <zecheng@xxxxxxxxxx>
---
include/linux/sched.h | 39 +++++++++++++++++++++------------------
kernel/sched/core.c | 20 ++++++++++++++++++++
2 files changed, 41 insertions(+), 18 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..b20b2d590cf6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -567,40 +567,43 @@ struct sched_statistics {
} ____cacheline_aligned;
struct sched_entity {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* Group the read most hot fields in sched_entity */
+ __cacheline_group_begin(hot);
+ struct sched_entity *parent;
+ /* rq on which this entity is (to be) queued: */
+ struct cfs_rq *cfs_rq;
+ /* rq "owned" by this entity/group: */
+ struct cfs_rq *my_q;
+ /* cached value of my_q->h_nr_running */
+ unsigned long runnable_weight;
+ int depth;
+#endif
+ unsigned char on_rq;
+ unsigned char sched_delayed;
+ unsigned char rel_deadline;
+ unsigned char custom_slice;
/* For load-balancing: */
struct load_weight load;
+ u64 vruntime;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ __cacheline_group_end(hot);
+#endif
struct rb_node run_node;
u64 deadline;
u64 min_vruntime;
u64 min_slice;
struct list_head group_node;
- unsigned char on_rq;
- unsigned char sched_delayed;
- unsigned char rel_deadline;
- unsigned char custom_slice;
- /* hole */
u64 exec_start;
u64 sum_exec_runtime;
u64 prev_sum_exec_runtime;
- u64 vruntime;
s64 vlag;
u64 slice;
u64 nr_migrations;
-#ifdef CONFIG_FAIR_GROUP_SCHED
- int depth;
- struct sched_entity *parent;
- /* rq on which this entity is (to be) queued: */
- struct cfs_rq *cfs_rq;
- /* rq "owned" by this entity/group: */
- struct cfs_rq *my_q;
- /* cached value of my_q->h_nr_running */
- unsigned long runnable_weight;
-#endif
-
#ifdef CONFIG_SMP
/*
* Per entity load average tracking.
@@ -610,7 +613,7 @@ struct sched_entity {
*/
struct sched_avg avg;
#endif
-};
+} ____cacheline_aligned;
struct sched_rt_entity {
struct list_head run_list;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ba89cd4f2fac..dcc50df9e8ca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8525,6 +8525,7 @@ static struct kmem_cache *task_group_cache __ro_after_init;
#endif
static void __init cfs_rq_struct_check(void);
+static void __init sched_entity_struct_check(void);
void __init sched_init(void)
{
@@ -8543,6 +8544,7 @@ void __init sched_init(void)
BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
#endif
cfs_rq_struct_check();
+ sched_entity_struct_check();
wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10805,3 +10807,21 @@ static void __init cfs_rq_struct_check(void)
#endif
#endif
}
+
+static void __init sched_entity_struct_check(void)
+{
+ /*
+ * The compile time check is only enabled with CONFIG_FAIR_GROUP_SCHED.
+ * We care about the placement of six hottest fields below.
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, parent);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, cfs_rq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, my_q);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot,
+ runnable_weight);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, on_rq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, load);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, vruntime);
+#endif
+}
--
2.49.0