Re: Problem: Possible regression in intel_pstate on 3.12

From: Dirk Brandewie
Date: Mon Dec 16 2013 - 12:28:27 EST


Hi Joakim,

Add the following patch to your v3.12 kernel and collect some data with the
command and send the resulting perf.data file:
perf record -a -c 1 -e power:pstate_sample sleep 10


TIA
--Dirk

commit b3dc2c2a106cea68e4c9c0f4747b15291113c4ae
Author: Dirk Brandewie <dirk.j.brandewie@xxxxxxxxx>
Date: Mon Dec 2 09:56:46 2013 -0800

intel_pstate: Add trace point to report internal state.

Add perf trace event "power:pstate_sample" to report driver state to
aid in diagnosing issues reported against intel_pstate.

Signed-off-by: Dirk Brandewie <dirk.j.brandewie@xxxxxxxxx>
---
drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++
include/trace/events/power.h | 53 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5f1cbae..c4f14d1 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -50,6 +50,8 @@ static inline int32_t div_fp(int32_t x, int32_t y)
return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
}

+static u64 energy_divisor;
+
struct sample {
int32_t core_pct_busy;
u64 aperf;
@@ -512,6 +514,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu)

rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
+
cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
cpu->samples[cpu->sample_ptr].aperf = aperf;
cpu->samples[cpu->sample_ptr].mperf = mperf;
@@ -565,10 +568,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
static void intel_pstate_timer_func(unsigned long __data)
{
struct cpudata *cpu = (struct cpudata *) __data;
+ struct sample *sample;
+ u64 energy;

intel_pstate_sample(cpu);
+
+ sample = &cpu->samples[cpu->sample_ptr];
+ rdmsrl(MSR_PKG_ENERGY_STATUS, energy);
+
intel_pstate_adjust_busy_pstate(cpu);

+ trace_pstate_sample(fp_toint(sample->core_pct_busy),
+ fp_toint(intel_pstate_get_scaled_busy(cpu)),
+ cpu->pstate.current_pstate,
+ sample->mperf,
+ sample->aperf,
+ energy/energy_divisor,
+ sample->freq);
+
if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
cpu->min_pstate_count++;
if (!(cpu->min_pstate_count % 5)) {
@@ -849,6 +866,7 @@ static int __init intel_pstate_init(void)
int cpu, rc = 0;
const struct x86_cpu_id *id;
struct cpu_defaults *cpu_info;
+ u64 units;

if (no_load)
return -ENODEV;
@@ -882,8 +900,12 @@ static int __init intel_pstate_init(void)
if (rc)
goto out;

+ rdmsrl(MSR_RAPL_POWER_UNIT, units);
+ energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */
+
intel_pstate_debug_expose_params();
intel_pstate_sysfs_expose_params();
+
return rc;
out:
get_online_cpus();
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index cda100d..9e9475c 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -35,6 +35,59 @@ DEFINE_EVENT(cpu, cpu_idle,
TP_ARGS(state, cpu_id)
);

+TRACE_EVENT(pstate_sample,
+
+ TP_PROTO(u32 core_busy,
+ u32 scaled_busy,
+ u32 state,
+ u64 mperf,
+ u64 aperf,
+ u32 energy,
+ u32 freq
+ ),
+
+ TP_ARGS(core_busy,
+ scaled_busy,
+ state,
+ mperf,
+ aperf,
+ energy,
+ freq
+ ),
+
+ TP_STRUCT__entry(
+ __field(u32, core_busy)
+ __field(u32, scaled_busy)
+ __field(u32, state)
+ __field(u64, mperf)
+ __field(u64, aperf)
+ __field(u32, energy)
+ __field(u32, freq)
+
+ ),
+
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/