[RFC 04/60] sched: Replace sd_numa_mask() hack with something sane

From: Jan H. SchÃnherr
Date: Fri Sep 07 2018 - 17:41:27 EST


Get rid of the global variable sched_domains_curr_level, which is used
to pass state into a sd_numa_mask(), which is used as a callback for
sched_domain_topology_level->mask().

Extend the ->mask() callback instead, so that it takes the topology level
as an extra argument. Provide a backward compatible ->simple_mask()
callback, so that existing code can stay as it is.

This enables other users to do queries via ->mask() without having to
worry about the global variable. It also opens up the possibility for
more generic topologies that require a dynamic number of levels (similar
to what NUMA already does on top of the system topology).

Signed-off-by: Jan H. SchÃnherr <jschoenh@xxxxxxxxx>
---
include/linux/sched/topology.h | 11 ++++++++---
kernel/sched/topology.c | 40 ++++++++++++++++++++++------------------
2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 530ad856372e..f78534f1cc1e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -165,7 +165,11 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);

bool cpus_share_cache(int this_cpu, int that_cpu);

-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+struct sched_domain_topology_level;
+
+typedef const struct cpumask *(*sched_domain_simple_mask_f)(int cpu);
+typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl,
+ int cpu);
typedef int (*sched_domain_flags_f)(void);

#define SDTL_OVERLAP 0x01
@@ -178,10 +182,11 @@ struct sd_data {
};

struct sched_domain_topology_level {
- sched_domain_mask_f mask;
+ sched_domain_simple_mask_f simple_mask;
sched_domain_flags_f sd_flags;
+ sched_domain_mask_f mask;
int flags;
- int numa_level;
+ int level;
struct sd_data data;
#ifdef CONFIG_SCHED_DEBUG
char *name;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8b64f3f57d50..0f2c3aa0a097 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1043,7 +1043,6 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
enum numa_topology_type sched_numa_topology_type;

static int sched_domains_numa_levels;
-static int sched_domains_curr_level;

int sched_max_numa_distance;
static int *sched_domains_numa_distance;
@@ -1084,15 +1083,9 @@ sd_init(struct sched_domain_topology_level *tl,
struct sd_data *sdd = &tl->data;
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
int sd_id, sd_weight, sd_flags = 0;
+ const struct cpumask *mask = tl->mask(tl, cpu);

-#ifdef CONFIG_NUMA
- /*
- * Ugly hack to pass state to sd_numa_mask()...
- */
- sched_domains_curr_level = tl->numa_level;
-#endif
-
- sd_weight = cpumask_weight(tl->mask(cpu));
+ sd_weight = cpumask_weight(mask);

if (tl->sd_flags)
sd_flags = (*tl->sd_flags)();
@@ -1138,7 +1131,7 @@ sd_init(struct sched_domain_topology_level *tl,
#endif
};

- cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+ cpumask_and(sched_domain_span(sd), cpu_map, mask);
sd_id = cpumask_first(sched_domain_span(sd));

/*
@@ -1170,7 +1163,7 @@ sd_init(struct sched_domain_topology_level *tl,
sd->idle_idx = 2;

sd->flags |= SD_SERIALIZE;
- if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
+ if (sched_domains_numa_distance[tl->level] > RECLAIM_DISTANCE) {
sd->flags &= ~(SD_BALANCE_EXEC |
SD_BALANCE_FORK |
SD_WAKE_AFFINE);
@@ -1195,17 +1188,23 @@ sd_init(struct sched_domain_topology_level *tl,
return sd;
}

+static const struct cpumask *
+sd_simple_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+ return tl->simple_mask(cpu);
+}
+
/*
* Topology list, bottom-up.
*/
static struct sched_domain_topology_level default_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_smt_mask, cpu_smt_flags, sd_simple_mask, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_coregroup_mask, cpu_core_flags, sd_simple_mask, SD_INIT_NAME(MC) },
#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { cpu_cpu_mask, NULL, sd_simple_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};

@@ -1221,13 +1220,18 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
return;

sched_domain_topology = tl;
+ for (; tl->mask || tl->simple_mask; tl++) {
+ if (tl->simple_mask)
+ tl->mask = sd_simple_mask;
+ }
}

#ifdef CONFIG_NUMA

-static const struct cpumask *sd_numa_mask(int cpu)
+static const struct cpumask *
+sd_numa_mask(struct sched_domain_topology_level *tl, int cpu)
{
- return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
+ return sched_domains_numa_masks[tl->level][cpu_to_node(cpu)];
}

static void sched_numa_warn(const char *str)
@@ -1446,7 +1450,7 @@ void sched_init_numa(void)
*/
tl[i++] = (struct sched_domain_topology_level){
.mask = sd_numa_mask,
- .numa_level = 0,
+ .level = 0,
SD_INIT_NAME(NODE)
};

@@ -1458,7 +1462,7 @@ void sched_init_numa(void)
.mask = sd_numa_mask,
.sd_flags = cpu_numa_flags,
.flags = SDTL_OVERLAP,
- .numa_level = j,
+ .level = j,
SD_INIT_NAME(NUMA)
};
}
--
2.9.3.1.gcba166c.dirty