Re: [PATCH] cpufreq: ondemand: Change the calculation of target frequency

From: Viresh Kumar
Date: Fri May 31 2013 - 04:52:05 EST


Sorry for joining the party so late.. I was running away from reviewing it :(

On 31 May 2013 02:37, Stratos Karafotis <stratosk@xxxxxxxxxxxx> wrote:
> Ondemand calculates load in terms of frequency and increases it only
> if the load_freq is greater than up_threshold multiplied by current
> or average frequency. This seems to produce oscillations of frequency
> between min and max because, for example, a relatively small load can
> easily saturate minimum frequency and lead the CPU to max. Then, the
> CPU will decrease back to min due to a small load_freq.
>
> This patch changes the calculation method of load and target frequency
> considering 2 points:
> - Load computation should be independent from current or average
> measured frequency. For example an absolute load 80% at 100MHz is not
> necessarily equivalent to 8% at 1000MHz in the next sampling interval.
> - Target frequency should be increased to any value of frequency table
> proportional to absolute load, instead to only the max.
>
> The patch also removes the unused code as a result of the above changes.
>
> Tested on Intel i7-3770 CPU @ 3.40GHz and on Quad core 1500MHz Krait.
> Phoronix benchmark of Linux Kernel Compilation 3.1 test shows an
> increase ~1.5% in performance. cpufreq_stats (time_in_state) shows
> that middle frequencies are used more, with this patch. Highest
> and lowest frequencies were used less by ~9%
>
> Signed-off-by: Stratos Karafotis <stratosk@xxxxxxxxxxxx>
> ---
> arch/x86/include/asm/processor.h | 29 ----------------------
> drivers/cpufreq/Makefile | 2 +-
> drivers/cpufreq/acpi-cpufreq.c | 5 ----
> drivers/cpufreq/cpufreq.c | 21 ----------------
> drivers/cpufreq/cpufreq_governor.c | 10 +-------
> drivers/cpufreq/cpufreq_governor.h | 1 -
> drivers/cpufreq/cpufreq_ondemand.c | 39 ++++++-----------------------
> drivers/cpufreq/mperf.c | 51 --------------------------------------
> drivers/cpufreq/mperf.h | 9 -------
> include/linux/cpufreq.h | 6 -----
> 10 files changed, 9 insertions(+), 164 deletions(-)
> delete mode 100644 drivers/cpufreq/mperf.c
> delete mode 100644 drivers/cpufreq/mperf.h

I believe you should have removed other users of getavg() in a separate
patch and also cc'd relevant people so that you can some review comments
from them.

> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 22224b3..2874a3b 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -941,35 +941,6 @@ extern int set_tsc_mode(unsigned int val);
>
> extern u16 amd_get_nb_id(int cpu);
>
> -struct aperfmperf {
> - u64 aperf, mperf;
> -};
> -
> -static inline void get_aperfmperf(struct aperfmperf *am)
> -{
> - WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
> -
> - rdmsrl(MSR_IA32_APERF, am->aperf);
> - rdmsrl(MSR_IA32_MPERF, am->mperf);
> -}
> -
> -#define APERFMPERF_SHIFT 10
> -
> -static inline
> -unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
> - struct aperfmperf *new)
> -{
> - u64 aperf = new->aperf - old->aperf;
> - u64 mperf = new->mperf - old->mperf;
> - unsigned long ratio = aperf;
> -
> - mperf >>= APERFMPERF_SHIFT;
> - if (mperf)
> - ratio = div64_u64(aperf, mperf);
> -
> - return ratio;
> -}
> -
> extern unsigned long arch_align_stack(unsigned long sp);
> extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
>
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 315b923..4519fb1 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -23,7 +23,7 @@ obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o
> # powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers.
> # speedstep-* is preferred over p4-clockmod.
>
> -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
> +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
> obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
> obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
> obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 11b8b4b..0025cdd 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -45,7 +45,6 @@
> #include <asm/msr.h>
> #include <asm/processor.h>
> #include <asm/cpufeature.h>
> -#include "mperf.h"
>
> MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
> MODULE_DESCRIPTION("ACPI Processor P-States Driver");
> @@ -842,10 +841,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> /* notify BIOS that we exist */
> acpi_processor_notify_smm(THIS_MODULE);
>
> - /* Check for APERF/MPERF support in hardware */
> - if (boot_cpu_has(X86_FEATURE_APERFMPERF))
> - acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
> -
> pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
> for (i = 0; i < perf->state_count; i++)
> pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 2d53f47..04ceddc 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1502,27 +1502,6 @@ no_policy:
> }
> EXPORT_SYMBOL_GPL(cpufreq_driver_target);
>
> -int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
> -{
> - int ret = 0;
> -
> - if (cpufreq_disabled())
> - return ret;
> -
> - if (!cpufreq_driver->getavg)
> - return 0;
> -
> - policy = cpufreq_cpu_get(policy->cpu);
> - if (!policy)
> - return -EINVAL;
> -
> - ret = cpufreq_driver->getavg(policy, cpu);
> -
> - cpufreq_cpu_put(policy);
> - return ret;
> -}
> -EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
> -
> /*
> * when "event" is CPUFREQ_GOV_LIMITS
> */
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index 5af40ad..eeab30a 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -97,7 +97,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
>
> policy = cdbs->cur_policy;
>
> - /* Get Absolute Load (in terms of freq for ondemand gov) */
> + /* Get Absolute Load */
> for_each_cpu(j, policy->cpus) {
> struct cpu_dbs_common_info *j_cdbs;
> u64 cur_wall_time, cur_idle_time;
> @@ -148,14 +148,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
>
> load = 100 * (wall_time - idle_time) / wall_time;
>
> - if (dbs_data->cdata->governor == GOV_ONDEMAND) {
> - int freq_avg = __cpufreq_driver_getavg(policy, j);
> - if (freq_avg <= 0)
> - freq_avg = policy->cur;
> -
> - load *= freq_avg;
> - }
> -
> if (load > max_load)
> max_load = load;
> }
> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> index e16a961..e899c11 100644
> --- a/drivers/cpufreq/cpufreq_governor.h
> +++ b/drivers/cpufreq/cpufreq_governor.h
> @@ -169,7 +169,6 @@ struct od_dbs_tuners {
> unsigned int sampling_rate;
> unsigned int sampling_down_factor;
> unsigned int up_threshold;
> - unsigned int adj_up_threshold;
> unsigned int powersave_bias;
> unsigned int io_is_busy;
> };
> diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
> index 4b9bb5d..c1e6d3e 100644
> --- a/drivers/cpufreq/cpufreq_ondemand.c
> +++ b/drivers/cpufreq/cpufreq_ondemand.c
> @@ -29,11 +29,9 @@
> #include "cpufreq_governor.h"
>
> /* On-demand governor macros */
> -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
> #define DEF_FREQUENCY_UP_THRESHOLD (80)
> #define DEF_SAMPLING_DOWN_FACTOR (1)
> #define MAX_SAMPLING_DOWN_FACTOR (100000)
> -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
> #define MICRO_FREQUENCY_UP_THRESHOLD (95)
> #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
> #define MIN_FREQUENCY_UP_THRESHOLD (11)
> @@ -159,14 +157,10 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
>
> /*
> * Every sampling_rate, we check, if current idle time is less than 20%
> - * (default), then we try to increase frequency. Every sampling_rate, we look
> - * for the lowest frequency which can sustain the load while keeping idle time
> - * over 30%. If such a frequency exist, we try to decrease to this frequency.
> - *
> - * Any frequency increase takes it to the maximum frequency. Frequency reduction
> - * happens at minimum steps of 5% (default) of current frequency
> + * (default), then we try to increase frequency. Else, we adjust the frequency
> + * proportional to load.
> */
> -static void od_check_cpu(int cpu, unsigned int load_freq)
> +static void od_check_cpu(int cpu, unsigned int load)
> {
> struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
> struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
> @@ -176,29 +170,17 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
> dbs_info->freq_lo = 0;
>
> /* Check for frequency increase */
> - if (load_freq > od_tuners->up_threshold * policy->cur) {
> + if (load > od_tuners->up_threshold) {
> /* If switching to max speed, apply sampling_down_factor */
> if (policy->cur < policy->max)
> dbs_info->rate_mult =
> od_tuners->sampling_down_factor;
> dbs_freq_increase(policy, policy->max);
> return;
> - }
> -
> - /* Check for frequency decrease */
> - /* if we cannot reduce the frequency anymore, break out early */
> - if (policy->cur == policy->min)
> - return;
> -
> - /*
> - * The optimal frequency is the frequency that is the lowest that can
> - * support the current CPU usage without triggering the up policy. To be
> - * safe, we focus 10 points under the threshold.
> - */
> - if (load_freq < od_tuners->adj_up_threshold
> - * policy->cur) {
> + } else {
> + /* Calculate the next frequency proportional to load */
> unsigned int freq_next;
> - freq_next = load_freq / od_tuners->adj_up_threshold;
> + freq_next = load * policy->max / 100;
>
> /* No longer fully busy, reset rate_mult */
> dbs_info->rate_mult = 1;
> @@ -372,9 +354,6 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
> input < MIN_FREQUENCY_UP_THRESHOLD) {
> return -EINVAL;
> }
> - /* Calculate the new adj_up_threshold */
> - od_tuners->adj_up_threshold += input;
> - od_tuners->adj_up_threshold -= od_tuners->up_threshold;
>
> od_tuners->up_threshold = input;
> return count;
> @@ -523,8 +502,6 @@ static int od_init(struct dbs_data *dbs_data)
> if (idle_time != -1ULL) {
> /* Idle micro accounting is supported. Use finer thresholds */
> tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
> - tuners->adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD -
> - MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
> /*
> * In nohz/micro accounting case we set the minimum frequency
> * not depending on HZ, but fixed (very low). The deferred
> @@ -533,8 +510,6 @@ static int od_init(struct dbs_data *dbs_data)
> dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
> } else {
> tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
> - tuners->adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD -
> - DEF_FREQUENCY_DOWN_DIFFERENTIAL;
>
> /* For correct statistics, we need 10 ticks for each measure */
> dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
> diff --git a/drivers/cpufreq/mperf.c b/drivers/cpufreq/mperf.c
> deleted file mode 100644
> index 911e193..0000000
> --- a/drivers/cpufreq/mperf.c
> +++ /dev/null
> @@ -1,51 +0,0 @@
> -#include <linux/kernel.h>
> -#include <linux/smp.h>
> -#include <linux/module.h>
> -#include <linux/init.h>
> -#include <linux/cpufreq.h>
> -#include <linux/slab.h>
> -
> -#include "mperf.h"
> -
> -static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
> -
> -/* Called via smp_call_function_single(), on the target CPU */
> -static void read_measured_perf_ctrs(void *_cur)
> -{
> - struct aperfmperf *am = _cur;
> -
> - get_aperfmperf(am);
> -}
> -
> -/*
> - * Return the measured active (C0) frequency on this CPU since last call
> - * to this function.
> - * Input: cpu number
> - * Return: Average CPU frequency in terms of max frequency (zero on error)
> - *
> - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
> - * over a period of time, while CPU is in C0 state.
> - * IA32_MPERF counts at the rate of max advertised frequency
> - * IA32_APERF counts at the rate of actual CPU frequency
> - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
> - * no meaning should be associated with absolute values of these MSRs.
> - */
> -unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
> - unsigned int cpu)
> -{
> - struct aperfmperf perf;
> - unsigned long ratio;
> - unsigned int retval;
> -
> - if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
> - return 0;
> -
> - ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
> - per_cpu(acfreq_old_perf, cpu) = perf;
> -
> - retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
> -
> - return retval;
> -}
> -EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
> -MODULE_LICENSE("GPL");
> diff --git a/drivers/cpufreq/mperf.h b/drivers/cpufreq/mperf.h
> deleted file mode 100644
> index 5dbf295..0000000
> --- a/drivers/cpufreq/mperf.h
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -/*
> - * (c) 2010 Advanced Micro Devices, Inc.
> - * Your use of this code is subject to the terms and conditions of the
> - * GNU general public license version 2. See "COPYING" or
> - * http://www.gnu.org/licenses/gpl.html
> - */
> -
> -unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
> - unsigned int cpu);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 037d36a..60fcb42 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -211,10 +211,6 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
> unsigned int target_freq,
> unsigned int relation);
>
> -
> -extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy,
> - unsigned int cpu);
> -
> int cpufreq_register_governor(struct cpufreq_governor *governor);
> void cpufreq_unregister_governor(struct cpufreq_governor *governor);
>
> @@ -254,8 +250,6 @@ struct cpufreq_driver {
> unsigned int (*get) (unsigned int cpu);
>
> /* optional */
> - unsigned int (*getavg) (struct cpufreq_policy *policy,
> - unsigned int cpu);
> int (*bios_limit) (int cpu, unsigned int *limit);
>
> int (*exit) (struct cpufreq_policy *policy);
> --
> 1.8.1.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/