Re: [PATCH 2/2] ARM: perf: Add support for Scorpion PMUs

From: Stephen Boyd
Date: Wed Feb 11 2015 - 18:28:16 EST


On 02/10, Stephen Boyd wrote:
> Scorpion supports a set of local performance monitor event
> selection registers (LPM) sitting behind a cp15 based interface
> that extend the architected PMU events to include Scorpion CPU
> and Venum VFP specific events. To use these events the user is
> expected to program the lpm register with the event code shifted
> into the group they care about and then point the PMNx event at
> that region+group combo by writing a LPMn_GROUPx event. Add
> support for this hardware.
>
> Note: the raw event number is a pure software construct that
> allows us to map the multi-dimensional number space of regions,
> groups, and event codes into a flat event number space suitable
> for use by the perf framework.
>
> This is based on code originally written by Ashwin Chaugule and
> Neil Leeder [1] massed to become similar to the Krait PMU support
> code.
>
> [1] https://www.codeaurora.org/cgit/quic/la/kernel/msm/tree/arch/arm/kernel/perf_event_msm.c?h=msm-3.4
>
> Cc: Neil Leeder <nleeder@xxxxxxxxxxxxxx>
> Cc: Ashwin Chaugule <ashwinc@xxxxxxxxxxxxxx>
> Cc: <devicetree@xxxxxxxxxxxxxxx>
> Signed-off-by: Stephen Boyd <sboyd@xxxxxxxxxxxxxx>
> ---
> Documentation/devicetree/bindings/arm/pmu.txt | 2 +
> arch/arm/kernel/perf_event_cpu.c | 2 +
> arch/arm/kernel/perf_event_v7.c | 395 ++++++++++++++++++++++++++
> 3 files changed, 399 insertions(+)
>
> diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
> index 75ef91d08f3b..6e54a9d88b7a 100644
> --- a/Documentation/devicetree/bindings/arm/pmu.txt
> +++ b/Documentation/devicetree/bindings/arm/pmu.txt
> @@ -18,6 +18,8 @@ Required properties:
> "arm,arm11mpcore-pmu"
> "arm,arm1176-pmu"
> "arm,arm1136-pmu"
> + "qcom,scorpion-pmu"
> + "qcom,scorpion-mp-pmu"
> "qcom,krait-pmu"
> - interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
> interrupt (PPI) then 1 interrupt should be specified.
> diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
> index dd9acc95ebc0..010ffd241434 100644
> --- a/arch/arm/kernel/perf_event_cpu.c
> +++ b/arch/arm/kernel/perf_event_cpu.c
> @@ -242,6 +242,8 @@ static struct of_device_id cpu_pmu_of_device_ids[] = {
> {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
> {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
> {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
> + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
> + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_pmu_init},
> {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
> {},
> };
> diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
> index 84a3ec3bc592..14bc8726f554 100644
> --- a/arch/arm/kernel/perf_event_v7.c
> +++ b/arch/arm/kernel/perf_event_v7.c
> @@ -140,6 +140,23 @@ enum krait_perf_types {
> KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210,
> };
>
> +/* ARMv7 Scorpion specific event types */
> +enum scorpion_perf_types {
> + SCORPION_LPM0_GROUP0 = 0x4c,
> + SCORPION_LPM1_GROUP0 = 0x50,
> + SCORPION_LPM2_GROUP0 = 0x54,
> + SCORPION_L2LPM_GROUP0 = 0x58,
> + SCORPION_VLPM_GROUP0 = 0x5c,
> +
> + SCORPION_ICACHE_ACCESS = 0x10053,
> + SCORPION_ICACHE_MISS = 0x10052,
> +
> + SCORPION_DTLB_ACCESS = 0x12013,
> + SCORPION_DTLB_MISS = 0x12012,
> +
> + SCORPION_ITLB_MISS = 0x12021,
> +};
> +
> /*
> * Cortex-A8 HW events mapping
> *
> @@ -482,6 +499,51 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
> };
>
> /*
> + * Scorpion HW events mapping
> + */
> +static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
> + PERF_MAP_ALL_UNSUPPORTED,
> + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
> + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
> + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
> + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
> + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
> +};
> +
> +static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
> + [PERF_COUNT_HW_CACHE_OP_MAX]
> + [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
> + PERF_CACHE_MAP_ALL_UNSUPPORTED,
> + /*
> + * The performance counters don't differentiate between read and write
> + * accesses/misses so this isn't strictly correct, but it's the best we
> + * can do. Writes and reads get combined.
> + */
> + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
> + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
> + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
> + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
> + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
> + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
> + [C(L1I)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
> + [C(L1I)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
> + /*
> + * Only ITLB misses and DTLB refills are supported. If users want the
> + * DTLB refills misses a raw counter must be used.
> + */
> + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
> + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
> + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
> + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
> + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
> + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
> + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
> + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
> + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
> + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
> +};
> +
> +/*
> * Perf Events' indices
> */
> #define ARMV7_IDX_CYCLE_COUNTER 0
> @@ -976,6 +1038,12 @@ static int krait_map_event_no_branch(struct perf_event *event)
> &krait_perf_cache_map, 0xFFFFF);
> }
>
> +static int scorpion_map_event(struct perf_event *event)
> +{
> + return armpmu_map_event(event, &scorpion_perf_map,
> + &scorpion_perf_cache_map, 0xFFFFF);
> +}
> +
> static void armv7pmu_init(struct arm_pmu *cpu_pmu)
> {
> cpu_pmu->handle_irq = armv7pmu_handle_irq;
> @@ -1463,6 +1531,333 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu)
> cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
> return 0;
> }
> +
> +/*
> + * Scorpion Local Performance Monitor Register (LPMn)
> + *
> + * 31 30 24 16 8 0
> + * +--------------------------------+
> + * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0
> + * +--------------------------------+
> + * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1
> + * +--------------------------------+
> + * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2
> + * +--------------------------------+
> + * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3
> + * +--------------------------------+
> + * VLPM | EN | CC | CC | CC | CC | N = 2, R = ?
> + * +--------------------------------+
> + * EN | G=3 | G=2 | G=1 | G=0
> + *
> + *
> + * Event Encoding:
> + *
> + * hwc->config_base = 0xNRCCG
> + *
> + * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
> + * R = region register
> + * CC = class of events the group G is choosing from
> + * G = group or particular event
> + *
> + * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
> + *
> + * A region (R) corresponds to a piece of the CPU (execution unit, instruction
> + * unit, etc.) while the event code (CC) corresponds to a particular class of
> + * events (interrupts for example). An event code is broken down into
> + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
> + * example).
> + */
> +
> +static u32 scorpion_read_pmresrn(int n)
> +{
> + u32 val;
> +
> + switch (n) {
> + case 0:
> + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
> + break;
> + case 1:
> + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
> + break;
> + case 2:
> + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
> + break;
> + case 3:
> + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
> + break;
> + default:
> + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
> + }
> +
> + return val;
> +}
> +
> +static void scorpion_write_pmresrn(int n, u32 val)
> +{
> + switch (n) {
> + case 0:
> + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
> + break;
> + case 1:
> + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
> + break;
> + case 2:
> + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
> + break;
> + case 3:
> + asm volatile("mcr p15, 3, %0, c15, c0, 0" : : "r" (val));
> + break;
> + default:
> + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
> + }
> +}
> +
> +static u32 scorpion_get_pmresrn_event(unsigned int region)
> +{
> + static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
> + SCORPION_LPM1_GROUP0,
> + SCORPION_LPM2_GROUP0,
> + SCORPION_L2LPM_GROUP0 };
> + return pmresrn_table[region];
> +}
> +
> +static void scorpion_evt_setup(int idx, u32 config_base)
> +{
> + u32 val;
> + u32 mask;
> + u32 vval, fval;
> + unsigned int region;
> + unsigned int group;
> + unsigned int code;
> + unsigned int group_shift;
> + bool venum_event;
> +
> + krait_decode_event(config_base, &region, &group, &code, &venum_event,
> + NULL);
> +
> + group_shift = group * 8;
> + mask = 0xff << group_shift;
> +
> + /* Configure evtsel for the region and group */
> + if (venum_event)
> + val = SCORPION_VLPM_GROUP0;
> + else
> + val = scorpion_get_pmresrn_event(region);
> + val += group;
> + /* Mix in mode-exclusion bits */
> + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
> + armv7_pmnc_write_evtsel(idx, val);
> +
> + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
> +
> + if (venum_event) {
> + venum_pre_pmresr(&vval, &fval);
> + val = venum_read_pmresr();
> + val &= ~mask;
> + val |= code << group_shift;
> + val |= PMRESRn_EN;
> + venum_write_pmresr(val);
> + venum_post_pmresr(vval, fval);
> + } else {
> + val = scorpion_read_pmresrn(region);
> + val &= ~mask;
> + val |= code << group_shift;
> + val |= PMRESRn_EN;
> + scorpion_write_pmresrn(region, val);
> + }
> +}
> +
> +static void scorpion_clearpmu(u32 config_base)
> +{
> + u32 val;
> + u32 vval, fval;
> + unsigned int region;
> + unsigned int group;
> + bool venum_event;
> +
> + krait_decode_event(config_base, &region, &group, NULL, &venum_event,
> + NULL);
> +
> + if (venum_event) {
> + venum_pre_pmresr(&vval, &fval);
> + val = venum_read_pmresr();
> + val = clear_pmresrn_group(val, group);
> + venum_write_pmresr(val);
> + venum_post_pmresr(vval, fval);
> + } else {
> + val = scorpion_read_pmresrn(region);
> + val = clear_pmresrn_group(val, group);
> + scorpion_write_pmresrn(region, val);
> + }
> +}
> +
> +static void scorpion_pmu_disable_event(struct perf_event *event)
> +{
> + unsigned long flags;
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
> + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
> +
> + /* Disable counter and interrupt */
> + raw_spin_lock_irqsave(&events->pmu_lock, flags);
> +
> + /* Disable counter */
> + armv7_pmnc_disable_counter(idx);
> +
> + /*
> + * Clear pmresr code (if destined for PMNx counters)
> + */
> + if (hwc->config_base & KRAIT_EVENT_MASK)
> + scorpion_clearpmu(hwc->config_base);
> +
> + /* Disable interrupt for this counter */
> + armv7_pmnc_disable_intens(idx);
> +
> + raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
> +}
> +
> +static void scorpion_pmu_enable_event(struct perf_event *event)
> +{
> + unsigned long flags;
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
> + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
> +
> + /*
> + * Enable counter and interrupt, and set the counter to count
> + * the event that we're interested in.
> + */
> + raw_spin_lock_irqsave(&events->pmu_lock, flags);
> +
> + /* Disable counter */
> + armv7_pmnc_disable_counter(idx);
> +
> + /*
> + * Set event (if destined for PMNx counters)
> + * We don't set the event for the cycle counter because we
> + * don't have the ability to perform event filtering.
> + */
> + if (hwc->config_base & KRAIT_EVENT_MASK)
> + scorpion_evt_setup(idx, hwc->config_base);
> + else if (idx != ARMV7_IDX_CYCLE_COUNTER)
> + armv7_pmnc_write_evtsel(idx, hwc->config_base);
> +
> + /* Enable interrupt for this counter */
> + armv7_pmnc_enable_intens(idx);
> +
> + /* Enable counter */
> + armv7_pmnc_enable_counter(idx);
> +
> + raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
> +}
> +
> +static void scorpion_pmu_reset(void *info)
> +{
> + u32 vval, fval;
> +
> + armv7pmu_reset(info);
> +
> + /* Clear all pmresrs */
> + scorpion_write_pmresrn(0, 0);
> + scorpion_write_pmresrn(1, 0);
> + scorpion_write_pmresrn(2, 0);
> + scorpion_write_pmresrn(3, 0);
> +
> + venum_pre_pmresr(&vval, &fval);
> + venum_write_pmresr(0);
> + venum_post_pmresr(vval, fval);
> +}
> +
> +static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
> + unsigned int group)
> +{
> + int bit;
> + struct hw_perf_event *hwc = &event->hw;
> + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
> +
> + if (hwc->config_base & VENUM_EVENT)
> + bit = SCORPION_VLPM_GROUP0;
> + else
> + bit = scorpion_get_pmresrn_event(region);
> + bit -= scorpion_get_pmresrn_event(0);
> + bit += group;
> + /*
> + * Lower bits are reserved for use by the counters (see
> + * armv7pmu_get_event_idx() for more info)
> + */
> + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
> +
> + return bit;
> +}
> +
> +/*
> + * We check for column exclusion constraints here.
> + * Two events cant use the same group within a pmresr register.
> + */
> +static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
> + struct perf_event *event)
> +{
> + int idx;
> + int bit = -1;
> + unsigned int region;
> + unsigned int code;
> + unsigned int group;
> + bool venum_event, scorpion_event;
> + struct hw_perf_event *hwc = &event->hw;
> +
> + krait_decode_event(hwc->config_base, &region, &group, &code,
> + &venum_event, &scorpion_event);
> +
> + if (venum_event || scorpion_event) {
> + /* Ignore invalid events */
> + if (group > 3 || region > 3)
> + return -EINVAL;
> +
> + bit = scorpion_event_to_bit(event, region, group);
> + if (test_and_set_bit(bit, cpuc->used_mask))
> + return -EAGAIN;
> + }
> +
> + idx = armv7pmu_get_event_idx(cpuc, event);
> + if (idx < 0 && bit >= 0)
> + clear_bit(bit, cpuc->used_mask);
> +
> + return idx;
> +}
> +
> +static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
> + struct perf_event *event)
> +{
> + int bit;
> + struct hw_perf_event *hwc = &event->hw;
> + unsigned int region;
> + unsigned int group;
> + bool venum_event, scorpion_event;
> +
> + krait_decode_event(hwc->config_base, &region, &group, NULL,
> + &venum_event, &scorpion_event);
> +
> + if (venum_event || scorpion_event) {
> + bit = scorpion_event_to_bit(event, region, group);
> + clear_bit(bit, cpuc->used_mask);
> + }
> +}
> +
> +static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
> +{
> + armv7pmu_init(cpu_pmu);
> + cpu_pmu->name = "armv7_scorpion";
> + cpu_pmu->map_event = scorpion_map_event;
> + cpu_pmu->num_events = armv7_read_num_pmnc_events();
> + cpu_pmu->reset = scorpion_pmu_reset;
> + cpu_pmu->enable = scorpion_pmu_enable_event;
> + cpu_pmu->disable = scorpion_pmu_disable_event;
> + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
> + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
> + return 0;
> +}
> #else
> static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)

I forgot to add the empty scorpion_pmu_init() when
CONFIG_CPU_V7=n. If there's no other comments by the end of the
week I'll send a v2.

--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/