Re: [PATCH v7 4/8] arm64: parse cpu capacity-dmips-mhz from DT

From: Vincent Guittot
Date: Mon Sep 12 2016 - 04:38:22 EST


Hi Juri,

On 5 September 2016 at 16:22, Juri Lelli <juri.lelli@xxxxxxx> wrote:
> With the introduction of cpu capacity-dmips-mhz bindings, CPU capacities
> can now be calculated from values extracted from DT and information
> coming from cpufreq. Add parsing of DT information at boot time, and
> complement it with cpufreq information. Also, store such information
> using per CPU variables, as we do for arm.
>
> Caveat: the information provided by this patch will start to be used in
> the future. We need to #define arch_scale_cpu_capacity to something
> provided in arch, so that scheduler's default implementation (which gets
> used if arch_scale_cpu_capacity is not defined) is overwritten.
>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will.deacon@xxxxxxx>
> Cc: Mark Brown <broonie@xxxxxxxxxx>
> Cc: Sudeep Holla <sudeep.holla@xxxxxxx>
> Signed-off-by: Juri Lelli <juri.lelli@xxxxxxx>

Acked-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>

> ---
>
> Changes from v1:
> - normalize w.r.t. highest capacity found in DT
> - bailout conditions (all-or-nothing)
>
> Changes from v4:
> - parsing modified to reflect change in binding (capacity-dmips-mhz)
>
> Changes from v5:
> - allocate raw_capacity array with kcalloc()
> - pr_err() only for partial capacity information
>
> Changes from v6:
> - use cpuinfo.max_freq instead of policy->max
> - add delayed work to unregister cpufreq notifier
> ---
> arch/arm64/kernel/topology.c | 159 ++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 158 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index 694f6deedbab..b75b0ba2e113 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -19,10 +19,162 @@
> #include <linux/nodemask.h>
> #include <linux/of.h>
> #include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/cpufreq.h>
>
> #include <asm/cputype.h>
> #include <asm/topology.h>
>
> +static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
> +
> +unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
> +{
> + return per_cpu(cpu_scale, cpu);
> +}
> +
> +static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
> +{
> + per_cpu(cpu_scale, cpu) = capacity;
> +}
> +
> +static u32 capacity_scale;
> +static u32 *raw_capacity;
> +static bool cap_parsing_failed;
> +
> +static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
> +{
> + int ret;
> + u32 cpu_capacity;
> +
> + if (cap_parsing_failed)
> + return;
> +
> + ret = of_property_read_u32(cpu_node,
> + "capacity-dmips-mhz",
> + &cpu_capacity);
> + if (!ret) {
> + if (!raw_capacity) {
> + raw_capacity = kcalloc(num_possible_cpus(),
> + sizeof(*raw_capacity),
> + GFP_KERNEL);
> + if (!raw_capacity) {
> + pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
> + cap_parsing_failed = true;
> + return;
> + }
> + }
> + capacity_scale = max(cpu_capacity, capacity_scale);
> + raw_capacity[cpu] = cpu_capacity;
> + pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
> + cpu_node->full_name, raw_capacity[cpu]);
> + } else {
> + if (raw_capacity) {
> + pr_err("cpu_capacity: missing %s raw capacity\n",
> + cpu_node->full_name);
> + pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
> + }
> + cap_parsing_failed = true;
> + kfree(raw_capacity);
> + }
> +}
> +
> +static void normalize_cpu_capacity(void)
> +{
> + u64 capacity;
> + int cpu;
> +
> + if (!raw_capacity || cap_parsing_failed)
> + return;
> +
> + pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
> + for_each_possible_cpu(cpu) {
> + pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
> + cpu, raw_capacity[cpu]);
> + capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
> + / capacity_scale;
> + set_capacity_scale(cpu, capacity);
> + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
> + cpu, arch_scale_cpu_capacity(NULL, cpu));
> + }
> +}
> +
> +#ifdef CONFIG_CPU_FREQ
> +static cpumask_var_t cpus_to_visit;
> +static bool cap_parsing_done;
> +static void parsing_done_workfn(struct work_struct *work);
> +static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
> +
> +static int
> +init_cpu_capacity_callback(struct notifier_block *nb,
> + unsigned long val,
> + void *data)
> +{
> + struct cpufreq_policy *policy = data;
> + int cpu;
> +
> + if (cap_parsing_failed || cap_parsing_done)
> + return 0;
> +
> + switch (val) {
> + case CPUFREQ_NOTIFY:
> + pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
> + cpumask_pr_args(policy->related_cpus),
> + cpumask_pr_args(cpus_to_visit));
> + cpumask_andnot(cpus_to_visit,
> + cpus_to_visit,
> + policy->related_cpus);
> + for_each_cpu(cpu, policy->related_cpus) {
> + raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
> + policy->cpuinfo.max_freq / 1000UL;
> + capacity_scale = max(raw_capacity[cpu], capacity_scale);
> + }
> + if (cpumask_empty(cpus_to_visit)) {
> + normalize_cpu_capacity();
> + kfree(raw_capacity);
> + pr_debug("cpu_capacity: parsing done\n");
> + cap_parsing_done = true;
> + schedule_work(&parsing_done_work);
> + }
> + }
> + return 0;
> +}
> +
> +static struct notifier_block init_cpu_capacity_notifier = {
> + .notifier_call = init_cpu_capacity_callback,
> +};
> +
> +static int __init register_cpufreq_notifier(void)
> +{
> + if (cap_parsing_failed)
> + return -EINVAL;
> +
> + if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
> + pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
> + return -ENOMEM;
> + }
> + cpumask_copy(cpus_to_visit, cpu_possible_mask);
> +
> + return cpufreq_register_notifier(&init_cpu_capacity_notifier,
> + CPUFREQ_POLICY_NOTIFIER);
> +}
> +core_initcall(register_cpufreq_notifier);
> +
> +static void parsing_done_workfn(struct work_struct *work)
> +{
> + cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
> + CPUFREQ_POLICY_NOTIFIER);
> +}
> +
> +#else
> +static int __init free_raw_capacity(void)
> +{
> + kfree(raw_capacity);
> +
> + return 0;
> +}
> +core_initcall(free_raw_capacity);
> +#endif
> +
> static int __init get_cpu_for_node(struct device_node *node)
> {
> struct device_node *cpu_node;
> @@ -34,6 +186,7 @@ static int __init get_cpu_for_node(struct device_node *node)
>
> for_each_possible_cpu(cpu) {
> if (of_get_cpu_node(cpu, NULL) == cpu_node) {
> + parse_cpu_capacity(cpu_node, cpu);
> of_node_put(cpu_node);
> return cpu;
> }
> @@ -178,13 +331,17 @@ static int __init parse_dt_topology(void)
> * cluster with restricted subnodes.
> */
> map = of_get_child_by_name(cn, "cpu-map");
> - if (!map)
> + if (!map) {
> + cap_parsing_failed = true;
> goto out;
> + }
>
> ret = parse_cluster(map, 0);
> if (ret != 0)
> goto out_map;
>
> + normalize_cpu_capacity();
> +
> /*
> * Check that all cores are in the topology; the SMP code will
> * only mark cores described in the DT as possible.
> --
> 2.7.0
>