Re: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer

From: Peter Zijlstra
Date: Tue May 05 2015 - 09:17:04 EST


On Mon, Apr 20, 2015 at 04:07:47AM -0400, Kan Liang wrote:
> +static inline void *
> +get_next_pebs_record_by_bit(void *base, void *top, int bit)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> + void *at;
> + u64 pebs_status;
> +
> + if (base == NULL)
> + return NULL;
> +
> + for (at = base; at < top; at += x86_pmu.pebs_record_size) {
> + struct pebs_record_nhm *p = at;
> +
> + if (test_bit(bit, (unsigned long *)&p->status)) {

Just wondering, is that BT better than: p->state & (1 << bit) ?

> +
> + if (p->status == (1 << bit))
> + return at;
> +
> + /* clear non-PEBS bit and re-check */
> + pebs_status = p->status & cpuc->pebs_enabled;
> + pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> + if (pebs_status == (1 << bit))
> + return at;
> + }
> + }
> + return NULL;
> +}
> +
> static void __intel_pmu_pebs_event(struct perf_event *event,
> + struct pt_regs *iregs,
> + void *base, void *top,
> + int bit, int count)
> {
> struct perf_sample_data data;
> struct pt_regs regs;
> + int i;
> + void *at = get_next_pebs_record_by_bit(base, top, bit);
>
> + if (!intel_pmu_save_and_restart(event) &&
> + !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
> return;
>
> + if (count > 1) {
> + for (i = 0; i < count - 1; i++) {
> + setup_pebs_sample_data(event, iregs, at, &data, &regs);
> + perf_event_output(event, &data, &regs);
> + at += x86_pmu.pebs_record_size;
> + at = get_next_pebs_record_by_bit(at, top, bit);
> + }
> + }
> +
> + setup_pebs_sample_data(event, iregs, at, &data, &regs);
>
> + /* all records are processed, handle event overflow now */

All but the last. There explicitly is one left to be able to call the
overflow handler is there not?

> + if (perf_event_overflow(event, &data, &regs)) {
> x86_pmu_stop(event, 0);
> + return;
> + }
> +
> }
>
> static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
> @@ -1000,72 +1081,86 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
> if (!event->attr.precise_ip)
> return;
>
> + n = (top - at) / x86_pmu.pebs_record_size;
> if (n <= 0)
> return;
>
> + __intel_pmu_pebs_event(event, iregs, at,
> + top, 0, n);
> }
>
> static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> {
> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> struct debug_store *ds = cpuc->ds;
> + struct perf_event *event;
> + void *base, *at, *top;
> int bit;
> + int counts[MAX_PEBS_EVENTS] = {};
>
> if (!x86_pmu.pebs_active)
> return;
>
> + base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
>
> ds->pebs_index = ds->pebs_buffer_base;
>
> + if (unlikely(base >= top))
> return;
>
> + for (at = base; at < top; at += x86_pmu.pebs_record_size) {
> struct pebs_record_nhm *p = at;
>
> for_each_set_bit(bit, (unsigned long *)&p->status,
> x86_pmu.max_pebs_events) {
> event = cpuc->events[bit];
> WARN_ON_ONCE(!event);
>
> + if (event->attr.precise_ip)
> + break;
> + }

Would it make sense to delay looking for the event until you've found
there is a single bit set -- and already know which bit that is?

>
> + if (bit >= x86_pmu.max_pebs_events)
> + continue;
> + if (!test_bit(bit, cpuc->active_mask))
> + continue;
> + /*
> + * The PEBS hardware does not deal well with the situation
> + * when events happen near to each other and multiple bits
> + * are set. But it should happen rarely.
> + *
> + * If these events include one PEBS and multiple non-PEBS
> + * events, it doesn't impact PEBS record. The record will
> + * be handled normally. (slow path)
> + *
> + * If these events include two or more PEBS events, the
> + * records for the events can be collapsed into a single
> + * one, and it's not possible to reconstruct all events
> + * that caused the PEBS record. It's called collision.
> + * If collision happened, the record will be dropped.
> + *
> + */
> + if (p->status != (1 << bit)) {
> + u64 pebs_status;
> +
> + /* slow path */
> + pebs_status = p->status & cpuc->pebs_enabled;
> + pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> + if (pebs_status != (1 << bit)) {
> + perf_log_lost(event);

Does it make sense to keep an error[bit] count and only log once with
the actual number in? -- when !0 obviously.

> continue;
> + }
> }
> + counts[bit]++;
> + }
>
> + for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
> + if (counts[bit] == 0)
> continue;
> + event = cpuc->events[bit];
> + __intel_pmu_pebs_event(event, iregs, base,
> + top, bit, counts[bit]);
> }
> }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/