Re: [PATCH 3/5] sched/fair: rework load_balance

From: Peter Zijlstra
Date: Fri Jul 19 2019 - 08:54:51 EST


On Fri, Jul 19, 2019 at 09:58:23AM +0200, Vincent Guittot wrote:

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 67f0acd..472959df 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5376,18 +5376,6 @@ static unsigned long capacity_of(int cpu)
> return cpu_rq(cpu)->cpu_capacity;
> }
>
> -static unsigned long cpu_avg_load_per_task(int cpu)
> -{
> - struct rq *rq = cpu_rq(cpu);
> - unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
> - unsigned long load_avg = cpu_runnable_load(rq);
> -
> - if (nr_running)
> - return load_avg / nr_running;
> -
> - return 0;
> -}
> -
> static void record_wakee(struct task_struct *p)
> {
> /*

> @@ -7646,7 +7669,6 @@ static unsigned long task_h_load(struct task_struct *p)
> struct sg_lb_stats {
> unsigned long avg_load; /*Avg load across the CPUs of the group */
> unsigned long group_load; /* Total load over the CPUs of the group */
> - unsigned long load_per_task;
> unsigned long group_capacity;
> unsigned long group_util; /* Total utilization of the group */
> unsigned int sum_nr_running; /* Nr tasks running in the group */


> @@ -8266,76 +8293,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
> }
>
> /**
> - * fix_small_imbalance - Calculate the minor imbalance that exists
> - * amongst the groups of a sched_domain, during
> - * load balancing.
> - * @env: The load balancing environment.
> - * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
> - */
> -static inline
> -void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
> -{
> - unsigned long tmp, capa_now = 0, capa_move = 0;
> - unsigned int imbn = 2;
> - unsigned long scaled_busy_load_per_task;
> - struct sg_lb_stats *local, *busiest;
> -
> - local = &sds->local_stat;
> - busiest = &sds->busiest_stat;
> -
> - if (!local->sum_h_nr_running)
> - local->load_per_task = cpu_avg_load_per_task(env->dst_cpu);
> - else if (busiest->load_per_task > local->load_per_task)
> - imbn = 1;
> -
> - scaled_busy_load_per_task =
> - (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
> - busiest->group_capacity;
> -
> - if (busiest->avg_load + scaled_busy_load_per_task >=
> - local->avg_load + (scaled_busy_load_per_task * imbn)) {
> - env->imbalance = busiest->load_per_task;
> - return;
> - }
> -
> - /*
> - * OK, we don't have enough imbalance to justify moving tasks,
> - * however we may be able to increase total CPU capacity used by
> - * moving them.
> - */
> -
> - capa_now += busiest->group_capacity *
> - min(busiest->load_per_task, busiest->avg_load);
> - capa_now += local->group_capacity *
> - min(local->load_per_task, local->avg_load);
> - capa_now /= SCHED_CAPACITY_SCALE;
> -
> - /* Amount of load we'd subtract */
> - if (busiest->avg_load > scaled_busy_load_per_task) {
> - capa_move += busiest->group_capacity *
> - min(busiest->load_per_task,
> - busiest->avg_load - scaled_busy_load_per_task);
> - }
> -
> - /* Amount of load we'd add */
> - if (busiest->avg_load * busiest->group_capacity <
> - busiest->load_per_task * SCHED_CAPACITY_SCALE) {
> - tmp = (busiest->avg_load * busiest->group_capacity) /
> - local->group_capacity;
> - } else {
> - tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
> - local->group_capacity;
> - }
> - capa_move += local->group_capacity *
> - min(local->load_per_task, local->avg_load + tmp);
> - capa_move /= SCHED_CAPACITY_SCALE;
> -
> - /* Move if we gain throughput */
> - if (capa_move > capa_now)
> - env->imbalance = busiest->load_per_task;
> -}
> -
> -/**
> * calculate_imbalance - Calculate the amount of imbalance present within the
> * groups of a given sched_domain during load balance.
> * @env: load balance environment

Maybe strip this out first, in a separate patch. It's all magic doo-doo.