Re: [PATCH 4/4] sched/topology: the group balance cpu must be a cpu where the group is installed

From: Peter Zijlstra
Date: Tue Apr 25 2017 - 12:26:56 EST


On Tue, Apr 25, 2017 at 12:56:23PM -0300, Lauro Venancio wrote:

> > Another thing I've been thinking about; I think we can do away with the
> > kzalloc() in build_group_from_child_sched_domain() and use the sdd->sg
> > storage.
> I considered this too. I decided to do not change this because I was not
> sure if the kzalloc() was there for performance reasons. Currently, all
> groups are allocated in the NUMA node they are used.
> If we use sdd->sg storage, we may have groups allocated in one NUMA node
> being used in another node.

Right.. I cannot remember :/

/me once again kicks himself for not writing more comments

It does save a few lines.. and I suspect that if we do this, we could
actually completely get rid of sched_group_capacity, since its now
always the same as the group (again), which should removes more lines
still.

But I'll shelf this patch for now.. we've got enough changes as is.

I still need to write a changelog for the new #2, which has become ugly
again, because its needs a second sched_domains_tmpmask.

(compile tested only)

---
kernel/sched/topology.c | 76 ++++++++++++++++++------------------------------
1 file changed, 29 insertions(+), 47 deletions(-)

--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -501,10 +501,8 @@ enum s_alloc {
* balancing.
*/
static void
-build_group_mask(struct sched_domain *sd, struct sched_group *sg, struct cpumask *mask)
+build_group_mask(struct sd_data *sdd, struct cpumask *sg_span, struct cpumask *mask)
{
- const struct cpumask *sg_span = sched_group_cpus(sg);
- struct sd_data *sdd = sd->private;
struct sched_domain *sibling;
int i;

@@ -542,49 +540,34 @@ int group_balance_cpu(struct sched_group
}

static struct sched_group *
-build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
+get_overlap_group(struct sd_data *sdd, int cpu)
{
- struct sched_group *sg;
- struct cpumask *sg_span;
+ struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+ struct sched_domain *child = sd->child;
+ struct sched_group *group;
+ struct cpumask *mask = sched_domains_tmpmask2;

- sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
- GFP_KERNEL, cpu_to_node(cpu));
+ /*
+ * Overlap must have !overlap children.
+ * This is before degenerate throws them out.
+ */
+ BUG_ON(!sd->child);

- if (!sg)
- return NULL;
+ build_group_mask(sdd, sched_domain_span(child), mask);
+ cpu = cpumask_first_and(sched_domain_span(child), mask);

- sg_span = sched_group_cpus(sg);
- if (sd->child)
- cpumask_copy(sg_span, sched_domain_span(sd->child));
- else
- cpumask_copy(sg_span, sched_domain_span(sd));
+ BUG_ON(cpu >= nr_cpu_ids);

- return sg;
-}
+ group = *per_cpu_ptr(sdd->sg, cpu);
+ group->sgc = *per_cpu_ptr(sdd->sgc, cpu);

-static void init_overlap_sched_group(struct sched_domain *sd,
- struct sched_group *sg)
-{
- struct cpumask *mask = sched_domains_tmpmask2;
- struct sd_data *sdd = sd->private;
- struct cpumask *sg_span;
- int cpu;
+ atomic_inc(&group->ref);
+ atomic_inc(&group->sgc->ref);

- build_group_mask(sd, sg, mask);
- cpu = cpumask_first_and(sched_group_cpus(sg), mask);
+ cpumask_copy(sched_group_cpus(group), sched_domain_span(child));
+ cpumask_copy(sched_group_mask(group), mask);

- sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
- if (atomic_inc_return(&sg->sgc->ref) == 1)
- cpumask_copy(sched_group_mask(sg), mask);
-
- /*
- * Initialize sgc->capacity such that even if we mess up the
- * domains and no possible iteration will get us here, we won't
- * die on a /0 trap.
- */
- sg_span = sched_group_cpus(sg);
- sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
- sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+ return group;
}

static int
@@ -620,14 +603,18 @@ build_overlap_sched_groups(struct sched_
if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
continue;

- sg = build_group_from_child_sched_domain(sibling, cpu);
- if (!sg)
- goto fail;
+ sg = get_overlap_group(sdd, i);

sg_span = sched_group_cpus(sg);
cpumask_or(covered, covered, sg_span);

- init_overlap_sched_group(sd, sg);
+ /*
+ * Initialize sgc->capacity such that even if we mess up the
+ * domains and no possible iteration will get us here, we won't
+ * die on a /0 trap.
+ */
+ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+ sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;

if (!first)
first = sg;
@@ -639,11 +626,6 @@ build_overlap_sched_groups(struct sched_
sd->groups = first;

return 0;
-
-fail:
- free_sched_groups(first, 0);
-
- return -ENOMEM;
}

static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)