Re: [Patch v4 5/6] thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping
From: Dietmar Eggemann
Date:  Thu Oct 31 2019 - 12:29:52 EST
On 22.10.19 22:34, Thara Gopinath wrote:
> Thermal governors can request for a cpu's maximum supported frequency
> to be capped in case of an overheat event. This in turn means that the
> maximum capacity available for tasks to run on the particular cpu is
> reduced. Delta between the original maximum capacity and capped
> maximum capacity is known as thermal pressure. Enable cpufreq cooling
> device to update the thermal pressure in event of a capped
> maximum frequency.
> 
> Signed-off-by: Thara Gopinath <thara.gopinath@xxxxxxxxxx>
> ---
>  drivers/thermal/cpu_cooling.c | 31 +++++++++++++++++++++++++++++--
>  1 file changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 391f397..2e6a979 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -218,6 +218,23 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
>  }
>  
>  /**
> + * update_sched_max_capacity - update scheduler about change in cpu
> + *					max frequency.
> + * @policy - cpufreq policy whose max frequency is capped.
> + */
> +static void update_sched_max_capacity(struct cpumask *cpus,
> +				      unsigned int cur_max_freq,
> +				      unsigned int max_freq)
> +{
> +	int cpu;
> +	unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> +				  max_freq;
> +
> +	for_each_cpu(cpu, cpus)
> +		update_thermal_pressure(cpu, capacity);
> +}
> +
> +/**
>   * get_load() - get load for a cpu since last updated
>   * @cpufreq_cdev:	&struct cpufreq_cooling_device for this cpu
>   * @cpu:	cpu number
> @@ -320,6 +337,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
>  				 unsigned long state)
>  {
>  	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
> +	int ret;
>  
>  	/* Request state should be less than max_level */
>  	if (WARN_ON(state > cpufreq_cdev->max_level))
> @@ -331,8 +349,17 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
>  
>  	cpufreq_cdev->cpufreq_state = state;
>  
> -	return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
> -				cpufreq_cdev->freq_table[state].frequency);
> +	ret = dev_pm_qos_update_request
> +				(&cpufreq_cdev->qos_req,
> +				 cpufreq_cdev->freq_table[state].frequency);
> +
> +	if (ret > 0)
> +		update_sched_max_capacity
> +				(cpufreq_cdev->policy->cpus,
> +				 cpufreq_cdev->freq_table[state].frequency,
> +				 cpufreq_cdev->policy->cpuinfo.max_freq);
> +
> +	return ret;
>  }
>  
>  /**
> 
Why not getting rid of update_sched_max_capacity() entirely and call
update_thermal_pressure() in cpu_cooling.c directly? Saves one level in
the call chain and would mean less code for this feature.
Just compile tested on arm64:
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 3211b4d3a899..bf36995013b0 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -217,23 +217,6 @@ static u32 cpu_power_to_freq(struct
cpufreq_cooling_device *cpufreq_cdev,
        return freq_table[i - 1].frequency;
 }
-/**
- * update_sched_max_capacity - update scheduler about change in cpu
- *                                     max frequency.
- * @policy - cpufreq policy whose max frequency is capped.
- */
-static void update_sched_max_capacity(struct cpumask *cpus,
-                                     unsigned int cur_max_freq,
-                                     unsigned int max_freq)
-{
-       int cpu;
-       unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
-                                 max_freq;
-
-       for_each_cpu(cpu, cpus)
-               update_thermal_pressure(cpu, capacity);
-}
-
 /**
  * get_load() - get load for a cpu since last updated
  * @cpufreq_cdev:      &struct cpufreq_cooling_device for this cpu
@@ -353,7 +336,7 @@ static int cpufreq_set_cur_state(struct
thermal_cooling_device *cdev,
                                cpufreq_cdev->freq_table[state].frequency);
        if (ret > 0)
-               update_sched_max_capacity
+               update_thermal_pressure
                                (cpufreq_cdev->policy->cpus,
                                 cpufreq_cdev->freq_table[state].frequency,
                                 cpufreq_cdev->policy->cpuinfo.max_freq);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55dfe9634f67..5707813c7621 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1985,9 +1985,9 @@ static inline void rseq_syscall(struct pt_regs *regs)
 #endif
 #ifdef CONFIG_SMP
-void update_thermal_pressure(int cpu, u64 capacity);
+void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
unsigned int max);
 #else
-static inline void update_thermal_pressure(int cpu, u64 capacity)
+static inline void update_thermal_pressure(struct cpumask *cpus,
unsigned int cur, unsigned int max);
 {
 }
 #endif
diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
index 0da31e12a5ff..691bdd79597a 100644
--- a/kernel/sched/thermal.c
+++ b/kernel/sched/thermal.c
@@ -43,17 +43,16 @@ static DEFINE_PER_CPU(unsigned long, delta_capacity);
  * the arch_scale_cpu_capacity and capped capacity is stored in per cpu
  * delta_capacity.
  */
-void update_thermal_pressure(int cpu, u64 capped_freq_ratio)
+void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
unsigned int max)
 {
-       unsigned long __capacity, delta;
+       int cpu;
-       /* Normalize the capped freq ratio */
-       __capacity = (capped_freq_ratio * arch_scale_cpu_capacity(cpu)) >>
-
SCHED_CAPACITY_SHIFT;
-       delta = arch_scale_cpu_capacity(cpu) -  __capacity;
-       pr_debug("updating cpu%d thermal pressure to %lu\n", cpu, delta);
+       for_each_cpu(cpu, cpus) {
+               unsigned long scale_cap = arch_scale_cpu_capacity(cpu);
+               unsigned long cur_cap = cur * scale_cap / max;
-       per_cpu(delta_capacity, cpu) = delta;
+               per_cpu(delta_capacity, cpu) = scale_cap - cur_cap;
+       }
 }