Re: [patch v3] sched: fix select_idle_sibling() induced bouncing

From: Suresh Siddha
Date: Mon Jun 11 2012 - 14:54:00 EST


On Mon, 2012-06-11 at 19:55 +0200, Mike Galbraith wrote:
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5928,6 +5928,11 @@ static void destroy_sched_domains(struct
> * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
> * allows us to avoid some pointer chasing select_idle_sibling().
> *
> + * Iterate domains and sched_groups upward, assigning CPUs to be

You are actually iterating downwards (starting from the highest domain
with the SHARE_PKG_RESOURCES flag) in the patch.

> + * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing
> + * due to random perturbation self canceling, ie sw buddies pull
> + * their counterpart to their CPU's hw counterpart.
> + *

Also it will be nice to include all the data you have observed as part
of the changelog.

thanks,
suresh

> * Also keep a unique ID per domain (we use the first cpu number in
> * the cpumask of the domain), this allows us to quickly tell if
> * two cpus are in the same cache domain, see cpus_share_cache().
> @@ -5943,8 +5948,40 @@ static void update_domain_cache(int cpu)
> int id = cpu;
>
> sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
> - if (sd)
> + if (sd) {
> + struct sched_domain *tmp = sd;
> + struct sched_group *sg, *prev;
> + bool right;
> +
> + /*
> + * Traversse to first CPU in group, and count hops
> + * to cpu from there, switching direction on each
> + * hop, never ever pointing the last CPU rightward.
> + */
> + do {
> + id = cpumask_first(sched_domain_span(tmp));
> + prev = sg = tmp->groups;
> + right = 1;
> +
> + while (cpumask_first(sched_group_cpus(sg)) != id)
> + sg = sg->next;
> +
> + while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
> + prev = sg;
> + sg = sg->next;
> + right = !right;
> + }
> +
> + /* A CPU went down, never point back to domain start. */
> + if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
> + right = false;
> +
> + sg = right? sg->next : prev;
> + tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
> + } while ((tmp = tmp->child));
> +
> id = cpumask_first(sched_domain_span(sd));
> + }
>
> rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
> per_cpu(sd_llc_id, cpu) = id;
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -2642,8 +2642,6 @@ static int select_idle_sibling(struct ta
> int cpu = smp_processor_id();
> int prev_cpu = task_cpu(p);
> struct sched_domain *sd;
> - struct sched_group *sg;
> - int i;
>
> /*
> * If the task is going to be woken-up on this cpu and if it is
> @@ -2660,29 +2658,17 @@ static int select_idle_sibling(struct ta
> return prev_cpu;
>
> /*
> - * Otherwise, iterate the domains and find an elegible idle cpu.
> + * Otherwise, check assigned siblings to find an elegible idle cpu.
> */
> sd = rcu_dereference(per_cpu(sd_llc, target));
> +
> for_each_lower_domain(sd) {
> - sg = sd->groups;
> - do {
> - if (!cpumask_intersects(sched_group_cpus(sg),
> - tsk_cpus_allowed(p)))
> - goto next;
> -
> - for_each_cpu(i, sched_group_cpus(sg)) {
> - if (!idle_cpu(i))
> - goto next;
> - }
> -
> - target = cpumask_first_and(sched_group_cpus(sg),
> - tsk_cpus_allowed(p));
> - goto done;
> -next:
> - sg = sg->next;
> - } while (sg != sd->groups);
> + if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p)))
> + continue;
> + if (idle_cpu(sd->idle_buddy))
> + return sd->idle_buddy;
> }
> -done:
> +
> return target;
> }
>
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/