Re: [RFC patch v3 08/20] sched: Set up LLC indexing

From: Shrikanth Hegde
Date: Thu Jul 03 2025 - 15:44:48 EST




On 6/18/25 23:57, Tim Chen wrote:
Prepare for indexing arrays that track in each run queue: the number
of tasks preferring current LLC and each of the other LLC.

The reason to introduce LLC index is because the per LLC-scope data
is needed to do cache aware load balancing. However, the native lld_id
is usually the first CPU of that LLC domain, which is not continuous,
which might waste the space if the per LLC-scope data is stored
in an array (in current implementation).

In the future, this LLC index could be removed after
the native llc_id is used as the key to search into xarray based
array.

Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
include/linux/sched.h | 3 +++
kernel/sched/fair.c | 12 ++++++++++++
kernel/sched/sched.h | 2 ++
kernel/sched/topology.c | 29 +++++++++++++++++++++++++++++
4 files changed, 46 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0e4cda2b3cd..7ce95a32e9ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,6 +810,9 @@ struct kmap_ctrl {
#endif
};
+/* XXX need fix to not use magic number */
+#define MAX_LLC 64

This number needs to be much higher. maybe keeping NR_CPUS wont hurt.

+
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 10ea408d0e40..5549710d95cf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1183,6 +1183,18 @@ static int llc_id(int cpu)
return per_cpu(sd_llc_id, cpu);
}
+/*
+ * continous index.
+ * TBD: replace by xarray with key llc_id()
+ */
+static inline int llc_idx(int cpu)
+{
+ if (cpu < 0)
+ return -1;
+
+ return per_cpu(sd_llc_idx, cpu);
+}
+
void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
{
unsigned long epoch;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1c6fd45c7f62..74eb2f3615aa 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2037,6 +2037,7 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DECLARE_PER_CPU(int, sd_llc_size);
DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(int, sd_llc_idx);
DECLARE_PER_CPU(int, sd_share_id);
DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
@@ -2045,6 +2046,7 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
extern struct static_key_false sched_asym_cpucapacity;
extern struct static_key_false sched_cluster_active;
+extern int max_llcs;
static __always_inline bool sched_asym_cpucap_active(void)
{
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f1ebc60d967f..b7bb13045dd8 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -672,6 +672,7 @@ static void destroy_sched_domains(struct sched_domain *sd)
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(int, sd_llc_idx);
DEFINE_PER_CPU(int, sd_share_id);
DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
@@ -681,6 +682,25 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
DEFINE_STATIC_KEY_FALSE(sched_cluster_active);
+int max_llcs = -1;
+
+static void update_llc_idx(int cpu)
+{
+#ifdef CONFIG_SCHED_CACHE
+ int idx = -1, llc_id = -1;
+
+ llc_id = per_cpu(sd_llc_id, cpu);
+ idx = per_cpu(sd_llc_idx, llc_id);
+
+ if (idx < 0) {
+ idx = max_llcs++;
+ BUG_ON(idx > MAX_LLC);

maybe a warning instead here?

+ per_cpu(sd_llc_idx, llc_id) = idx;
+ }
+ per_cpu(sd_llc_idx, cpu) = idx;
+#endif
+}
+
static void update_top_cache_domain(int cpu)
{
struct sched_domain_shared *sds = NULL;
@@ -699,6 +719,7 @@ static void update_top_cache_domain(int cpu)
per_cpu(sd_llc_size, cpu) = size;
per_cpu(sd_llc_id, cpu) = id;
rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
+ update_llc_idx(cpu);
sd = lowest_flag_domain(cpu, SD_CLUSTER);
if (sd)
@@ -2394,6 +2415,14 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
bool has_asym = false;
bool has_cluster = false;
+#ifdef CONFIG_SCHED_CACHE
+ if (max_llcs < 0) {
+ for_each_possible_cpu(i)
+ per_cpu(sd_llc_idx, i) = -1;
+ max_llcs = 0;
+ }
+#endif
+
if (WARN_ON(cpumask_empty(cpu_map)))
goto error;