[PATCH 4/4] perf/x86/intel: Throttle PEBS events only from pmi

From: Jiri Olsa
Date: Wed Dec 28 2016 - 08:31:33 EST


This patch fixes following WARNING:

WARNING: CPU: 15 PID: 15768 at arch/x86/events/core.c:1256 x86_pmu_start+0x1b3/0x1c0
...
Call Trace:
<IRQ>
dump_stack+0x86/0xc3
__warn+0xcb/0xf0
warn_slowpath_null+0x1d/0x20
x86_pmu_start+0x1b3/0x1c0
perf_event_task_tick+0x342/0x3f0
scheduler_tick+0x75/0xd0
update_process_times+0x47/0x60
tick_sched_handle.isra.19+0x25/0x60
tick_sched_timer+0x3d/0x70
__hrtimer_run_queues+0xfb/0x510
hrtimer_interrupt+0x9d/0x1a0
local_apic_timer_interrupt+0x38/0x60
smp_trace_apic_timer_interrupt+0x56/0x25a
trace_apic_timer_interrupt+0x9d/0xb0
...

which happens AFAICS under following conditions:
(we have PEBS events configured)

- x86_pmu_enable reconfigures counters and calls:
- x86_pmu_stop on PEBS event
- x86_pmu_stop drains the PEBS buffer, crosses
the throttle limit and sets:
event->hw.interrupts = MAX_INTERRUPTS
- following x86_pmu_start call starts the event
- perf_event_task_tick is triggered
- perf_adjust_freq_unthr_context sees event with
MAX_INTERRUPTS set and calls x86_pmu_start on already
started event, which triggers the warning

My first attempt to fix this was to unthrottle the event
before starting it in x86_pmu_enable. But I think that
omitting the throttling completely when we are not in the
PMI is better.

Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
---
arch/x86/events/intel/core.c | 2 +-
arch/x86/events/intel/ds.c | 17 +++++++++--------
arch/x86/events/perf_event.h | 2 +-
3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 86138267b68a..16332867d435 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2137,7 +2137,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
*/
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
handled++;
- x86_pmu.drain_pebs(regs);
+ x86_pmu.drain_pebs(regs, 1);
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
}

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 9dfeeeca0ea8..75ba3cb06012 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -603,7 +603,7 @@ static inline void intel_pmu_drain_pebs_buffer(void)
{
struct pt_regs regs;

- x86_pmu.drain_pebs(&regs);
+ x86_pmu.drain_pebs(&regs, 0);
}

void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
@@ -1233,7 +1233,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
void *base, void *top,
- int bit, int count)
+ int bit, int count, int pmi)
{
struct perf_sample_data data;
struct pt_regs regs;
@@ -1257,14 +1257,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, &data, &regs)) {
+ if (perf_event_overflow_throttle(event, pmi, &data, &regs)) {
x86_pmu_stop(event, 0);
return;
}

}

-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, int pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
@@ -1295,10 +1295,10 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
if (n <= 0)
return;

- __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+ __intel_pmu_pebs_event(event, iregs, at, top, 0, n, pmi);
}

-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, int pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
@@ -1392,13 +1392,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);

- if (perf_event_account_interrupt(event))
+ if (pmi && perf_event_account_interrupt(event))
x86_pmu_stop(event, 0);
}

if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base,
- top, bit, counts[bit]);
+ top, bit, counts[bit],
+ pmi);
}
}
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index bcbb1d2ae10b..cdcfdd06b2d2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -590,7 +590,7 @@ struct x86_pmu {
pebs_prec_dist :1;
int pebs_record_size;
int pebs_buffer_size;
- void (*drain_pebs)(struct pt_regs *regs);
+ void (*drain_pebs)(struct pt_regs *regs, int pmi);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
--
2.7.4