Re: [PATCH v6 04/11] cpufreq/schedutil: use rt utilization tracking

From: Peter Zijlstra
Date: Fri Jun 22 2018 - 11:23:15 EST


On Fri, Jun 22, 2018 at 01:37:13PM +0200, Peter Zijlstra wrote:
> That is true.. So we could limit the scaling to the case where there is
> no idle time, something like:
>
> util = sg_cpu->util_cfs;
>
> cap_cfs = (1024 - (sg_cpu->util_rt + ...));
> if (util == cap_cfs)
> util = sg_cpu->max;
>

OK, it appears this is more or less what the patches do. And I think
there's a small risk/hole with this where util ~= cap_cfs but very close
due to some unaccounted time.

FWIW, when looking, I saw no reason why sugov_get_util() and
sugov_aggregate_util() were in fact separate functions.

--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -53,11 +53,7 @@ struct sugov_cpu {
unsigned int iowait_boost_max;
u64 last_update;

- /* The fields below are only needed when sharing a policy: */
- unsigned long util_cfs;
unsigned long util_dl;
- unsigned long bw_dl;
- unsigned long util_rt;
unsigned long max;

/* The field below is for single-CPU policies only: */
@@ -181,44 +177,38 @@ static unsigned int get_next_freq(struct
return cpufreq_driver_resolve_freq(policy, freq);
}

-static void sugov_get_util(struct sugov_cpu *sg_cpu)
+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
{
struct rq *rq = cpu_rq(sg_cpu->cpu);
+ unsigned long util, max;

- sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
- sg_cpu->util_cfs = cpu_util_cfs(rq);
- sg_cpu->util_dl = cpu_util_dl(rq);
- sg_cpu->bw_dl = cpu_bw_dl(rq);
- sg_cpu->util_rt = cpu_util_rt(rq);
-}
-
-static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
-{
- struct rq *rq = cpu_rq(sg_cpu->cpu);
- unsigned long util;
+ sg_cpu->max = max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
+ sg_cpu->util_dl = cpu_util_dl(rq);

if (rq->rt.rt_nr_running)
- return sg_cpu->max;
+ return max;

- util = sg_cpu->util_cfs;
- util += sg_cpu->util_rt;
+ util = cpu_util_cfs(rq);
+ util += cpu_util_rt(rq);

- if ((util + sg_cpu->util_dl) >= sg_cpu->max)
- return sg_cpu->max;
+ /*
+ * If there is no idle time, we should run at max frequency.
+ */
+ if ((util + cpu_util_dl(rq)) >= max)
+ return max;

/*
- * As there is still idle time on the CPU, we need to compute the
- * utilization level of the CPU.
* Bandwidth required by DEADLINE must always be granted while, for
* FAIR and RT, we use blocked utilization of IDLE CPUs as a mechanism
* to gracefully reduce the frequency when no tasks show up for longer
* periods of time.
+ *
+ * Ideally we would like to set bw_dl as min/guaranteed freq and bw_dl
+ * + util as requested freq. However, cpufreq is not yet ready for such
+ * an interface. So, we only do the latter for now.
*/

- /* Add DL bandwidth requirement */
- util += sg_cpu->bw_dl;
-
- return min(sg_cpu->max, util);
+ return min(max, cpu_bw_dl(rq) + util);
}

/**
@@ -396,9 +386,8 @@ static void sugov_update_single(struct u

busy = sugov_cpu_is_busy(sg_cpu);

- sugov_get_util(sg_cpu);
+ util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- util = sugov_aggregate_util(sg_cpu);
sugov_iowait_apply(sg_cpu, time, &util, &max);
next_f = get_next_freq(sg_policy, util, max);
/*
@@ -437,9 +426,8 @@ static unsigned int sugov_next_freq_shar
struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
unsigned long j_util, j_max;

- sugov_get_util(j_sg_cpu);
+ j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- j_util = sugov_aggregate_util(j_sg_cpu);
sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);

if (j_util * max > j_max * util) {