Re: [RFC PATCH 6/7] perf, x86: large PEBS interrupt threshold

From: Peter Zijlstra
Date: Wed May 28 2014 - 13:05:43 EST


On Wed, May 28, 2014 at 09:08:47AM -0700, Andi Kleen wrote:
> On Wed, May 28, 2014 at 05:35:48PM +0200, Peter Zijlstra wrote:
> > On Wed, May 28, 2014 at 07:58:31AM -0700, Andi Kleen wrote:

> > > When a PEBS counter overflows it clears its respective GLOBAL_STATUS bit
> > > automatically.
> >
> > That's an ambiguous statement; did you mean to say a PEBS enabled
> > counter will not raise its bit in GLOBAL_STATUS on counter overflow?
>
> Let's revisit how PEBS works:
>
> - The counter overflows and sets the GLOBAL_STATUS bit
> - The PEBS assist is armed
> - The counter triggers again
> - The PEBS assist fires and delivers a PEBS record
> - Finally it clears the GLOBAL_STATUS
> - When the threshold is reached it raises an PMI
>
> So the GLOBAL_STATUS bit is visible between the first overflow and the end
> of the PEBS record delivery.

OK, so that's something entirely different from what you initially said,
but it is what I thought it did -- you said that it clears on overflow,
but it clears after recording.

Lets denote the above steps as A-F and then consider the following
scenario (hmm, while drawing the states I ended up with something
different than I thought):

Counter0 Counter1

0A
0B

1A
1B

0C-E
0F

<PMI>
1C-E
1F


If we get the PMI (where denoted) we can actually reconstruct which
event triggered, by looking at which bit(s) flipped between the recorded
state and the current state (due to E coming before F)

Now, admittedly we don't actually do that, but the below patch
implements that (I think, its not been near a compiler).

Hmm,.. .oO I think we might be able to do something similar with
multi-events, look at the next records ->state, and use the current MSR
value when there's no next record.

Thoughts?

---
arch/x86/kernel/cpu/perf_event.h | 2 +-
arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
arch/x86/kernel/cpu/perf_event_intel_ds.c | 21 +++++++--------------
3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3b2f9bdd974b..a71f6bd05f19 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -445,7 +445,7 @@ struct x86_pmu {
pebs_active :1,
pebs_broken :1;
int pebs_record_size;
- void (*drain_pebs)(struct pt_regs *regs);
+ void (*drain_pebs)(struct pt_regs *regs, u64 status);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index adb02aa62af5..3371310f200e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1386,7 +1386,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
*/
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
handled++;
- x86_pmu.drain_pebs(regs);
+ x86_pmu.drain_pebs(regs, status);
}

/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970cb744d..0ad62addff7f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -953,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
x86_pmu_stop(event, 0);
}

-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, u64 status)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
@@ -994,13 +994,12 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
__intel_pmu_pebs_event(event, iregs, at);
}

-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, u64 status)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
struct perf_event *event = NULL;
void *at, *top;
- u64 status = 0;
int bit;

if (!x86_pmu.pebs_active)
@@ -1024,28 +1023,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)

for (; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
+ u64 rec_status = (p->status ^ status) & (cpuc->pebs_enabled & 0xFFFFFFFF);

- for_each_set_bit(bit, (unsigned long *)&p->status,
+ for_each_set_bit(bit, (unsigned long *)&rec_status,
x86_pmu.max_pebs_events) {
event = cpuc->events[bit];
if (!test_bit(bit, cpuc->active_mask))
continue;

- WARN_ON_ONCE(!event);
-
- if (!event->attr.precise_ip)
+ if (WARN_ON_ONCE(!event))
continue;

- if (__test_and_set_bit(bit, (unsigned long *)&status))
+ if (!event->attr.precise_ip)
continue;

- break;
+ __intel_pmu_pebs_event(event, iregs, at);
}
-
- if (!event || bit >= x86_pmu.max_pebs_events)
- continue;
-
- __intel_pmu_pebs_event(event, iregs, at);
}
}

Attachment: pgph3Qcemzer4.pgp
Description: PGP signature