Re: [PATCH 11/12] perf, x86: implement AMD IBS event configuration

From: Stephane Eranian
Date: Mon Apr 19 2010 - 09:46:41 EST


Comments below:

On Tue, Apr 13, 2010 at 10:23 PM, Robert Richter <robert.richter@xxxxxxx> wrote:
> This patch implements IBS for perf_event. It extends the AMD pmu to
> handle model specific IBS events.
>
> With the attr.model_spec bit set we can setup hardware events others
> than generic performance counters. A special PMU 64 bit config value
> can be passed through the perf_event interface. The concept of PMU
> model-specific arguments was practiced already in Perfmon2. The type
> of event (8 bits) is determinded from the config value too, bit 32-39
> are reserved for this.
>
> There are two types of IBS events for instruction fetch (IBS_FETCH)
> and instruction execution (IBS_OP). Both events are implemented as
> special x86 events. The event allocation is implemented by using
> special event constraints for ibs. This way, the generic event
> scheduler can be used to allocate ibs events.
>
> IBS can only be set up with raw (model specific) config values and raw
> data samples. The event attributes for the syscall should be
> programmed like this (IBS_FETCH):
>
> Â Â Â Âmemset(&attr, 0, sizeof(attr));
>    Âattr.type    Â= PERF_TYPE_RAW;
> Â Â Â Âattr.sample_type = PERF_SAMPLE_CPU | PERF_SAMPLE_RAW;
>    Âattr.config   Â= IBS_FETCH_CONFIG_DEFAULT
>    Âattr.config   |=
> Â Â Â Â Â Â Â Â((unsigned long long)MODEL_SPEC_TYPE_IBS_FETCH << 32)
> Â Â Â Â Â Â Â Â& MODEL_SPEC_TYPE_MASK;
> Â Â Â Âattr.model_spec Â= 1;
>
> The whole ibs example will be part of libpfm4.
>
> The next patch implements the ibs interrupt handler.
>
> Cc: Stephane Eranian <eranian@xxxxxxxxxx>
> Signed-off-by: Robert Richter <robert.richter@xxxxxxx>
> ---
> Âarch/x86/include/asm/perf_event.h  Â|  Â4 +-
> Âarch/x86/kernel/cpu/perf_event.c   |  20 ++++
> Âarch/x86/kernel/cpu/perf_event_amd.c | Â161 ++++++++++++++++++++++++++++++++-
> Â3 files changed, 179 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 9f10215..fd5c326 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -102,13 +102,15 @@ union cpuid10_edx {
> Â#define X86_PMC_IDX_FIXED_BUS_CYCLES Â Â Â Â Â Â Â Â Â (X86_PMC_IDX_FIXED + 2)
>
> Â/*
> - * We model BTS tracing as another fixed-mode PMC.
> + * Masks for special PMU features
> Â*
> Â* We choose a value in the middle of the fixed event range, since lower
> Â* values are used by actual fixed events and higher values are used
> Â* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
> Â*/
> Â#define X86_PMC_IDX_SPECIAL_BTS Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â(X86_PMC_IDX_SPECIAL + 0)
> +#define X86_PMC_IDX_SPECIAL_IBS_FETCH Â Â Â Â Â Â Â Â Â(X86_PMC_IDX_SPECIAL + 1)
> +#define X86_PMC_IDX_SPECIAL_IBS_OP Â Â Â Â Â Â Â Â Â Â (X86_PMC_IDX_SPECIAL + 2)
>
> Â/* IbsFetchCtl bits/masks */
> Â#define IBS_FETCH_RAND_EN Â Â Â Â Â Â Â(1ULL<<57)
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 8f9674f..e2328f4 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -184,6 +184,26 @@ union perf_capabilities {
> Â};
>
> Â/*
> + * Model specific hardware events
> + *
> + * With the attr.model_spec bit set we can setup hardware events
> + * others than generic performance counters. A special PMU 64 bit
> + * config value can be passed through the perf_event interface. The
> + * concept of PMU model-specific arguments was practiced already in
> + * Perfmon2. The type of event (8 bits) is determinded from the config
> + * value too, bit 32-39 are reserved for this.
> + */
> +#define MODEL_SPEC_TYPE_IBS_FETCH Â Â Â0
> +#define MODEL_SPEC_TYPE_IBS_OP Â Â Â Â 1
> +
> +#define MODEL_SPEC_TYPE_MASK Â Â Â Â Â (0xFFULL << 32)
> +
> +static inline int get_model_spec_type(u64 config)
> +{
> + Â Â Â return (config & MODEL_SPEC_TYPE_MASK) >> 32;
> +}
> +
> +/*
> Â* struct x86_pmu - generic x86 pmu
> Â*/
> Âstruct x86_pmu {
> diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
> index 27daead..3dc327c 100644
> --- a/arch/x86/kernel/cpu/perf_event_amd.c
> +++ b/arch/x86/kernel/cpu/perf_event_amd.c
> @@ -2,6 +2,10 @@
>
> Â#include <linux/pci.h>
>
> +#define IBS_FETCH_CONFIG_MASK Â(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
> +#define IBS_OP_CONFIG_MASK Â Â (IBS_OP_CNT_CTL | IBS_OP_MAX_CNT)
> +#define AMD64_NUM_COUNTERS Â Â 4
> +
> Âstatic DEFINE_RAW_SPINLOCK(amd_nb_lock);
>
> Âstatic __initconst const u64 amd_hw_cache_event_ids
> @@ -193,6 +197,118 @@ static void release_ibs_hardware(void)
> Â Â Â Âclear_ibs_nmi();
> Â}
>
> +static inline void amd_pmu_disable_ibs(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> + Â Â Â u64 val;
> +
> + Â Â Â if (test_bit(X86_PMC_IDX_SPECIAL_IBS_FETCH, cpuc->active_mask)) {
> + Â Â Â Â Â Â Â rdmsrl(MSR_AMD64_IBSFETCHCTL, val);
> + Â Â Â Â Â Â Â val &= ~IBS_FETCH_ENABLE;
> + Â Â Â Â Â Â Â wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
> + Â Â Â }
> + Â Â Â if (test_bit(X86_PMC_IDX_SPECIAL_IBS_OP, cpuc->active_mask)) {
> + Â Â Â Â Â Â Â rdmsrl(MSR_AMD64_IBSOPCTL, val);
> + Â Â Â Â Â Â Â val &= ~IBS_OP_ENABLE;
> + Â Â Â Â Â Â Â wrmsrl(MSR_AMD64_IBSOPCTL, val);
> + Â Â Â }
> +}
> +
> +static inline void amd_pmu_enable_ibs(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> + Â Â Â u64 val;
> +
> + Â Â Â if (test_bit(X86_PMC_IDX_SPECIAL_IBS_FETCH, cpuc->active_mask)) {
> + Â Â Â Â Â Â Â rdmsrl(MSR_AMD64_IBSFETCHCTL, val);
> + Â Â Â Â Â Â Â val |= IBS_FETCH_ENABLE;
> + Â Â Â Â Â Â Â wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
> + Â Â Â }
> + Â Â Â if (test_bit(X86_PMC_IDX_SPECIAL_IBS_OP, cpuc->active_mask)) {
> + Â Â Â Â Â Â Â rdmsrl(MSR_AMD64_IBSOPCTL, val);
> + Â Â Â Â Â Â Â val |= IBS_OP_ENABLE;
> + Â Â Â Â Â Â Â wrmsrl(MSR_AMD64_IBSOPCTL, val);
> + Â Â Â }
> +}

Aren't you picking up stale state by using read-modify-write here?

> +
> +static int amd_pmu_ibs_config(struct perf_event *event)
> +{
> + Â Â Â int type;
> +
> + Â Â Â if (!x86_pmu.ibs)
> + Â Â Â Â Â Â Â return -ENODEV;
> +
> + Â Â Â if (event->hw.sample_period)
> + Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â* The usage of the sample period attribute to
> + Â Â Â Â Â Â Â Â* calculate the IBS max count value is not yet
> + Â Â Â Â Â Â Â Â* supported, the max count must be in the raw config
> + Â Â Â Â Â Â Â Â* value.
> + Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â return -ENOSYS;
> +
What is the problem with directly using the period here, rejecting
any value that is off range or with bottom 4 bits set?

> + Â Â Â if (event->attr.type != PERF_TYPE_RAW)
> + Â Â Â Â Â Â Â /* only raw sample types are supported */
> + Â Â Â Â Â Â Â return -EINVAL;
> +
> + Â Â Â type = get_model_spec_type(event->attr.config);
> + Â Â Â switch (type) {
> + Â Â Â case MODEL_SPEC_TYPE_IBS_FETCH:
> + Â Â Â Â Â Â Â event->hw.config = IBS_FETCH_CONFIG_MASK & event->attr.config;
> + Â Â Â Â Â Â Â event->hw.idx = X86_PMC_IDX_SPECIAL_IBS_FETCH;
> + Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â* dirty hack, needed for __x86_pmu_enable_event(), we
> + Â Â Â Â Â Â Â Â* should better change event->hw.config_base into
> + Â Â Â Â Â Â Â Â* event->hw.config_msr that already includes the index
> + Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â event->hw.config_base = MSR_AMD64_IBSFETCHCTL - event->hw.idx;
> + Â Â Â Â Â Â Â break;
> + Â Â Â case MODEL_SPEC_TYPE_IBS_OP:
> + Â Â Â Â Â Â Â event->hw.config = IBS_OP_CONFIG_MASK & event->attr.config;
> + Â Â Â Â Â Â Â event->hw.idx = X86_PMC_IDX_SPECIAL_IBS_OP;
> + Â Â Â Â Â Â Â event->hw.config_base = MSR_AMD64_IBSOPCTL - event->hw.idx;
> + Â Â Â Â Â Â Â break;

IBSOP.cnt_ctl only available from RevC, need to check and reject if older.


> + Â Â Â default:
> + Â Â Â Â Â Â Â return -ENODEV;
> + Â Â Â }
> +
> + Â Â Â return 0;
> +}
> +
> +static inline void __amd_pmu_enable_ibs_event(struct hw_perf_event *hwc)
> +{
> + Â Â Â if (hwc->idx == X86_PMC_IDX_SPECIAL_IBS_FETCH)
> + Â Â Â Â Â Â Â __x86_pmu_enable_event(hwc, IBS_FETCH_ENABLE);
> + Â Â Â else if (hwc->idx == X86_PMC_IDX_SPECIAL_IBS_OP)
> + Â Â Â Â Â Â Â __x86_pmu_enable_event(hwc, IBS_OP_ENABLE);
> +}
> +
> +static void amd_pmu_disable_all(void)
> +{
> + Â Â Â x86_pmu_disable_all();
> + Â Â Â amd_pmu_disable_ibs();
> +}
> +
> +static void amd_pmu_enable_all(int added)
> +{
> + Â Â Â x86_pmu_enable_all(added);
> + Â Â Â amd_pmu_enable_ibs();
> +}
> +
> +static void amd_pmu_enable_event(struct perf_event *event)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> + Â Â Â struct hw_perf_event *hwc = &event->hw;
> +
> + Â Â Â if (!cpuc->enabled)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â if (hwc->idx < X86_PMC_IDX_SPECIAL)
> + Â Â Â Â Â Â Â __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
> + Â Â Â else
> + Â Â Â Â Â Â Â __amd_pmu_enable_ibs_event(hwc);
> +}
> +
> Âstatic u64 amd_pmu_event_map(int hw_event)
> Â{
> Â Â Â Âreturn amd_perfmon_event_map[hw_event];
> @@ -200,7 +316,12 @@ static u64 amd_pmu_event_map(int hw_event)
>
> Âstatic int amd_pmu_hw_config(struct perf_event *event)
> Â{
> - Â Â Â int ret = x86_pmu_hw_config(event);
> + Â Â Â int ret;
> +
> + Â Â Â if (event->attr.model_spec)
> + Â Â Â Â Â Â Â return amd_pmu_ibs_config(event);
> +
> + Â Â Â ret = x86_pmu_hw_config(event);
>
> Â Â Â Âif (ret)
> Â Â Â Â Â Â Â Âreturn ret;
> @@ -214,6 +335,23 @@ static int amd_pmu_hw_config(struct perf_event *event)
> Â}
>
> Â/*
> + * AMD64 events - list of special events (IBS)
> + */
> +static struct event_constraint amd_event_constraints[] =
> +{
> + Â Â Â /*
> + Â Â Â Â* The value for the weight of these constraints is higher
> + Â Â Â Â* than in the unconstrainted case to process ibs after the
> + Â Â Â Â* generic counters in x86_schedule_events().
> + Â Â Â Â*/
> + Â Â Â __EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_SPECIAL_IBS_FETCH, 0,
> + Â Â Â Â Â Â Â Â Â Â Â Â ÂAMD64_NUM_COUNTERS + 1),
> + Â Â Â __EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_SPECIAL_IBS_OP, 0,
> + Â Â Â Â Â Â Â Â Â Â Â Â ÂAMD64_NUM_COUNTERS + 1),
> + Â Â Â EVENT_CONSTRAINT_END
> +};
> +
I think you could define EVENT_IBS_CONSTRAINT() and shorten
the definitions here. You could pass FETCH or OP and the macro
would do the bit shifting. This is how it's done for fixed counters on Intel.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/