Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

From: Stephane Eranian
Date: Wed Jun 06 2012 - 06:35:12 EST


Ok, I found the problem. It was in intel_fixup_er().
Unlike in the original code, this routine must update
the event->extra_reg.idx to the idx parameter instead
of trying to swap out from it.

I will repost an updated version (v2).


On Tue, Jun 5, 2012 at 11:35 PM, Stephane Eranian <eranian@xxxxxxxxxx> wrote:
>
> Zheng Yan reported that event group validation can wreck event state
> when Intel extra_reg allocation changes event state.
>
> Validation shouldn't change any persistent state. Cloning events in
> validate_{event,group}() isn't really pretty either, so add a few
> special cases to avoid modifying the event state.
>
> The code is restructured to minimize the special case impact.
>
> Reported-by: Zheng Yan <zheng.z.yan@xxxxxxxxxxxxxxx>
> Cc: Stephane Eranian <eranian@xxxxxxxxxx>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Acked-by: Stephane Eranian <eranian@xxxxxxxxxx>
> ---
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index e049d6d..cb60838 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
> Â Â Â Â Â Â Â Âif (!cpuc->shared_regs)
> Â Â Â Â Â Â Â Â Â Â Â Âgoto error;
> Â Â Â Â}
> + Â Â Â cpuc->is_fake = 1;
> Â Â Â Âreturn cpuc;
> Âerror:
> Â Â Â Âfree_fake_cpuc(cpuc);
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index 6638aaf..83794d8 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -117,6 +117,7 @@ struct cpu_hw_events {
>    Âstruct perf_event    *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
>
>    Âunsigned int      Âgroup_flag;
> +    int           is_fake;
>
> Â Â Â Â/*
> Â Â Â Â * Intel DebugStore bits
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..76a2bd2 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
> Â Â Â Âreturn NULL;
> Â}
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static int intel_alt_er(int idx)
> Â{
> Â Â Â Âif (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> - Â Â Â Â Â Â Â return false;
> + Â Â Â Â Â Â Â return idx;
>
> - Â Â Â if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> + Â Â Â if (idx == EXTRA_REG_RSP_0)
> + Â Â Â Â Â Â Â return EXTRA_REG_RSP_1;
> +
> + Â Â Â if (idx == EXTRA_REG_RSP_1)
> + Â Â Â Â Â Â Â return EXTRA_REG_RSP_0;
> +
> + Â Â Â return idx;
> +}
> +
> +static void intel_fixup_er(struct perf_event *event, int idx)
> +{
> + Â Â Â if (idx == EXTRA_REG_RSP_0) {
> Â Â Â Â Â Â Â Âevent->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> Â Â Â Â Â Â Â Âevent->hw.config |= 0x01bb;
> Â Â Â Â Â Â Â Âevent->hw.extra_reg.idx = EXTRA_REG_RSP_1;
> Â Â Â Â Â Â Â Âevent->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> - Â Â Â } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> + Â Â Â } else if (idx == EXTRA_REG_RSP_1) {
> Â Â Â Â Â Â Â Âevent->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> Â Â Â Â Â Â Â Âevent->hw.config |= 0x01b7;
> Â Â Â Â Â Â Â Âevent->hw.extra_reg.idx = EXTRA_REG_RSP_0;
> Â Â Â Â Â Â Â Âevent->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
> Â Â Â Â}
> -
> - Â Â Â if (event->hw.extra_reg.idx == orig_idx)
> - Â Â Â Â Â Â Â return false;
> -
> - Â Â Â return true;
> Â}
>
> Â/*
> @@ -1157,14 +1163,19 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
> Â Â Â Âstruct event_constraint *c = &emptyconstraint;
> Â Â Â Âstruct er_account *era;
> Â Â Â Âunsigned long flags;
> - Â Â Â int orig_idx = reg->idx;
> + Â Â Â int idx = reg->idx;
>
> - Â Â Â /* already allocated shared msr */
> - Â Â Â if (reg->alloc)
> - Â Â Â Â Â Â Â return NULL; /* call x86_get_event_constraint() */
> + Â Â Â /*
> + Â Â Â Â* reg->alloc can be set due to existing state, so for fake cpuc
> + Â Â Â Â* we need to ignore this, otherwise we might fail to allocate
> + Â Â Â Â* proper fake state for this extra reg constraint. Also see
> + Â Â Â Â* the comment below.
> + Â Â Â */
> + Â Â Â if (reg->alloc && !cpuc->is_fake)
> + Â Â Â Â Â Â Â return NULL; /* call x86_get_event_constraints() */
>
> Âagain:
> - Â Â Â era = &cpuc->shared_regs->regs[reg->idx];
> + Â Â Â era = &cpuc->shared_regs->regs[idx];
> Â Â Â Â/*
> Â Â Â Â * we use spin_lock_irqsave() to avoid lockdep issues when
> Â Â Â Â * passing a fake cpuc
> @@ -1172,6 +1183,27 @@ again:
> Â Â Â Âraw_spin_lock_irqsave(&era->lock, flags);
>
> Â Â Â Âif (!atomic_read(&era->ref) || era->config == reg->config) {
> + Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â* If its a fake cpuc -- as per validate_{group,event}() we
> + Â Â Â Â Â Â Â Â* shouldn't touch event state and we can avoid doing so
> + Â Â Â Â Â Â Â Â* since both will only call get_event_constraints() once
> + Â Â Â Â Â Â Â Â* on each event, this avoids the need for reg->alloc.
> + Â Â Â Â Â Â Â Â*
> + Â Â Â Â Â Â Â Â* Not doing the ER fixup will only result in era->reg being
> + Â Â Â Â Â Â Â Â* wrong, but since we won't actually try and program hardware
> + Â Â Â Â Â Â Â Â* this isn't a problem either.
> + Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â if (!cpuc->is_fake) {
> + Â Â Â Â Â Â Â Â Â Â Â if (idx != reg->idx)
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â intel_fixup_er(event, idx);
> + Â Â Â Â Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â Â Â Â Â* x86_schedule_events() calls get_event_constraints()
> + Â Â Â Â Â Â Â Â Â Â Â Â* multiple times on events in the case of incremental
> + Â Â Â Â Â Â Â Â Â Â Â Â* scheduling(). reg->alloc ensures we only do the ER
> + Â Â Â Â Â Â Â Â Â Â Â Â* allocation once.
> + Â Â Â Â Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â Â Â Â Â reg->alloc = 1;
> + Â Â Â Â Â Â Â }
>
> Â Â Â Â Â Â Â Â/* lock in msr value */
> Â Â Â Â Â Â Â Âera->config = reg->config;
> @@ -1180,18 +1212,19 @@ again:
> Â Â Â Â Â Â Â Â/* one more user */
> Â Â Â Â Â Â Â Âatomic_inc(&era->ref);
>
> - Â Â Â Â Â Â Â /* no need to reallocate during incremental event scheduling */
> - Â Â Â Â Â Â Â reg->alloc = 1;
> -
> Â Â Â Â Â Â Â Â/*
> Â Â Â Â Â Â Â Â * need to call x86_get_event_constraint()
> Â Â Â Â Â Â Â Â * to check if associated event has constraints
> Â Â Â Â Â Â Â Â */
> Â Â Â Â Â Â Â Âc = NULL;
> - Â Â Â } else if (intel_try_alt_er(event, orig_idx)) {
> - Â Â Â Â Â Â Â raw_spin_unlock_irqrestore(&era->lock, flags);
> - Â Â Â Â Â Â Â goto again;
> + Â Â Â } else {
> + Â Â Â Â Â Â Â idx = intel_alt_er(idx);
> + Â Â Â Â Â Â Â if (idx != reg->idx) {
> + Â Â Â Â Â Â Â Â Â Â Â raw_spin_unlock_irqrestore(&era->lock, flags);
> + Â Â Â Â Â Â Â Â Â Â Â goto again;
> + Â Â Â Â Â Â Â }
> Â Â Â Â}
> +
> Â Â Â Âraw_spin_unlock_irqrestore(&era->lock, flags);
>
> Â Â Â Âreturn c;
> @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
> Â Â Â Âstruct er_account *era;
>
> Â Â Â Â/*
> - Â Â Â Â* only put constraint if extra reg was actually
> - Â Â Â Â* allocated. Also takes care of event which do
> - Â Â Â Â* not use an extra shared reg
> + Â Â Â Â* Only put constraint if extra reg was actually allocated. Also takes
> + Â Â Â Â* care of event which do not use an extra shared reg.
> + Â Â Â Â*
> + Â Â Â Â* Also, if this is a fake cpuc we shouldn't touch any event state
> + Â Â Â Â* (reg->alloc) and we don't care about leaving inconsistent cpuc state
> + Â Â Â Â* either since it'll be thrown out.
> Â Â Â Â */
> - Â Â Â if (!reg->alloc)
> + Â Â Â if (!reg->alloc || cpuc->is_fake)
> Â Â Â Â Â Â Â Âreturn;
>
> Â Â Â Âera = &cpuc->shared_regs->regs[reg->idx];
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/