RE: [PATCH 2/2] perf/x86/intel/uncore: support IIO freerunning counter for SKX

From: Liang, Kan
Date: Mon Oct 16 2017 - 21:20:25 EST



Ping.
Any comments for this patch?

Thanks,
Kan
>
> From: Kan Liang <Kan.liang@xxxxxxxxx>
>
> As of Skylake Server, there are a number of free-running counters in
> each IIO Box that collect counts for per box IO clocks and per Port
> Input/Output x BW/Utilization.
>
> The event code of free running event is shared with fixed event, which
> is 0xff.
> The umask of free running event starts from 0x10. The umask less than
> 0x10 is reserved for fixed event.
>
> The Free running counters could have different MSR location and offset.
> Accordingly, they are divided into different types. Each type is limited
> to only have at most 16 events.
> So the umask of the first free running events type starts from 0x10. The
> umask of the second starts from 0x20. The rest can be done in the same
> manner.
>
> Freerunning counters cannot be written by SW. Counting will be suspended
> only when the IIO Box is powered down. They are specially handled in
> uncore_pmu_event_add/del/start/stop and not added in box->events list.
>
> The bit width of freerunning counter is 36-bit.
>
> Signed-off-by: Kan Liang <Kan.liang@xxxxxxxxx>
> ---
> arch/x86/events/intel/uncore.c | 33 +++++++++++++++++-
> arch/x86/events/intel/uncore.h | 67
> +++++++++++++++++++++++++++++++++++-
> arch/x86/events/intel/uncore_snbep.c | 58
> +++++++++++++++++++++++++++++++
> 3 files changed, 156 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
> index 1c5390f..8d3e46c 100644
> --- a/arch/x86/events/intel/uncore.c
> +++ b/arch/x86/events/intel/uncore.c
> @@ -218,7 +218,9 @@ void uncore_perf_event_update(struct
> intel_uncore_box *box, struct perf_event *e
> u64 prev_count, new_count, delta;
> int shift;
>
> - if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
> + if (event->hw.idx >= UNCORE_PMC_IDX_FREERUNNING)
> + shift = 64 - uncore_free_running_bits(box, event);
> + else if (event->hw.idx == UNCORE_PMC_IDX_FIXED)
> shift = 64 - uncore_fixed_ctr_bits(box);
> else
> shift = 64 - uncore_perf_ctr_bits(box);
> @@ -362,6 +364,9 @@ uncore_collect_events(struct intel_uncore_box *box,
> struct perf_event *leader,
> if (n >= max_count)
> return -EINVAL;
>
> + if (event->hw.idx == UNCORE_PMC_IDX_FREERUNNING)
> + continue;
> +
> box->event_list[n] = event;
> n++;
> }
> @@ -454,6 +459,12 @@ static void uncore_pmu_event_start(struct
> perf_event *event, int flags)
> struct intel_uncore_box *box = uncore_event_to_box(event);
> int idx = event->hw.idx;
>
> + if (event->hw.idx == UNCORE_PMC_IDX_FREERUNNING) {
> + local64_set(&event->hw.prev_count,
> + uncore_read_counter(box, event));
> + return;
> + }
> +
> if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> return;
>
> @@ -479,6 +490,11 @@ static void uncore_pmu_event_stop(struct
> perf_event *event, int flags)
> struct intel_uncore_box *box = uncore_event_to_box(event);
> struct hw_perf_event *hwc = &event->hw;
>
> + if (hwc->idx == UNCORE_PMC_IDX_FREERUNNING) {
> + uncore_perf_event_update(box, event);
> + return;
> + }
> +
> if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
> uncore_disable_event(box, event);
> box->n_active--;
> @@ -512,6 +528,13 @@ static int uncore_pmu_event_add(struct perf_event
> *event, int flags)
> if (!box)
> return -ENODEV;
>
> + if (hwc->idx == UNCORE_PMC_IDX_FREERUNNING) {
> + event->hw.event_base = uncore_free_running_msr(box,
> event);
> + if (flags & PERF_EF_START)
> + uncore_pmu_event_start(event, 0);
> + return 0;
> + }
> +
> ret = n = uncore_collect_events(box, event, false);
> if (ret < 0)
> return ret;
> @@ -570,6 +593,9 @@ static void uncore_pmu_event_del(struct perf_event
> *event, int flags)
>
> uncore_pmu_event_stop(event, PERF_EF_UPDATE);
>
> + if (event->hw.idx == UNCORE_PMC_IDX_FREERUNNING)
> + return;
> +
> for (i = 0; i < box->n_events; i++) {
> if (event == box->event_list[i]) {
> uncore_put_event_constraint(box, event);
> @@ -690,6 +716,11 @@ static int uncore_pmu_event_init(struct perf_event
> *event)
>
> /* fixed counters have event field hardcoded to zero */
> hwc->config = 0ULL;
> + } else if (is_free_running_event(event)) {
> + if (UNCORE_FREE_RUNNING_MSR_IDX(event->attr.config) >
> + uncore_num_free_running(box, event))
> + return -EINVAL;
> + event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
> } else {
> hwc->config = event->attr.config &
> (pmu->type->event_mask | ((u64)pmu->type-
> >event_mask_ext << 32));
> diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
> index df5989f..3e60686 100644
> --- a/arch/x86/events/intel/uncore.h
> +++ b/arch/x86/events/intel/uncore.h
> @@ -12,7 +12,25 @@
> #define UNCORE_FIXED_EVENT 0xff
> #define UNCORE_PMC_IDX_MAX_GENERIC 8
> #define UNCORE_PMC_IDX_FIXED
> UNCORE_PMC_IDX_MAX_GENERIC
> -#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED +
> 1)
> +#define UNCORE_PMC_IDX_FREERUNNING (UNCORE_PMC_IDX_FIXED +
> 1)
> +#define UNCORE_PMC_IDX_MAX
> (UNCORE_PMC_IDX_FREERUNNING + 1)
> +
> +/*
> + * Free running MSR events have the same event code 0xff as fixed events.
> + * The Free running events umask starts from 0x10.
> + * The umask which is less than 0x10 is reserved for fixed events.
> + *
> + * The Free running events are divided into different types according to
> + * MSR location, bit width or definition. Each type is limited to only have
> + * at most 16 events.
> + * So the umask of first type starts from 0x10, the second starts from 0x20,
> + * the rest can be done in the same manner.
> + */
> +#define UNCORE_FREE_RUNNING_MSR_START 0x10
> +#define UNCORE_FREE_RUNNING_MSR_IDX(config) ((config >> 8)
> & 0xf)
> +#define UNCORE_FREE_RUNNING_MSR_TYPE_IDX(config) \
> + ((((config >> 8) - UNCORE_FREE_RUNNING_MSR_START) >> 4)
> & 0xf)
> +
>
> #define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
> ((dev << 24) | (func << 16) | (type << 8) | idx)
> @@ -34,6 +52,7 @@ struct intel_uncore_ops;
> struct intel_uncore_pmu;
> struct intel_uncore_box;
> struct uncore_event_desc;
> +struct free_running_msr;
>
> struct intel_uncore_type {
> const char *name;
> @@ -41,6 +60,7 @@ struct intel_uncore_type {
> int num_boxes;
> int perf_ctr_bits;
> int fixed_ctr_bits;
> + int num_free_running_type;
> unsigned perf_ctr;
> unsigned event_ctl;
> unsigned event_mask;
> @@ -58,6 +78,7 @@ struct intel_uncore_type {
> struct intel_uncore_pmu *pmus;
> struct intel_uncore_ops *ops;
> struct uncore_event_desc *event_descs;
> + struct free_running_msr *free_running;
> const struct attribute_group *attr_groups[4];
> struct pmu *pmu; /* for custom pmu ops */
> };
> @@ -128,6 +149,13 @@ struct uncore_event_desc {
> const char *config;
> };
>
> +struct free_running_msr {
> + unsigned msr_base;
> + unsigned msr_off;
> + unsigned num_counters;
> + unsigned bits;
> +};
> +
> struct pci2phy_map {
> struct list_head list;
> int segment;
> @@ -214,6 +242,18 @@ static inline unsigned uncore_msr_fixed_ctr(struct
> intel_uncore_box *box)
> }
>
> static inline
> +unsigned uncore_free_running_msr(struct intel_uncore_box *box,
> + struct perf_event *event)
> +{
> + unsigned type = UNCORE_FREE_RUNNING_MSR_TYPE_IDX(event-
> >attr.config);
> + unsigned idx = UNCORE_FREE_RUNNING_MSR_IDX(event-
> >attr.config);
> + struct intel_uncore_pmu *pmu = box->pmu;
> +
> + return pmu->type->free_running[type].msr_base + idx +
> + pmu->type->free_running[type].msr_off * pmu->pmu_idx;
> +}
> +
> +static inline
> unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
> {
> return box->pmu->type->event_ctl +
> @@ -275,11 +315,36 @@ static inline int uncore_fixed_ctr_bits(struct
> intel_uncore_box *box)
> return box->pmu->type->fixed_ctr_bits;
> }
>
> +static inline unsigned
> +uncore_free_running_bits(struct intel_uncore_box *box,
> + struct perf_event *event)
> +{
> + unsigned idx = UNCORE_FREE_RUNNING_MSR_TYPE_IDX(event-
> >attr.config);
> +
> + return box->pmu->type->free_running[idx].bits;
> +}
> +
> +static inline int uncore_num_free_running(struct intel_uncore_box *box,
> + struct perf_event *event)
> +{
> + unsigned idx = UNCORE_FREE_RUNNING_MSR_TYPE_IDX(event-
> >attr.config);
> +
> + return box->pmu->type->free_running[idx].num_counters;
> +}
> +
> static inline int uncore_num_counters(struct intel_uncore_box *box)
> {
> return box->pmu->type->num_counters;
> }
>
> +static inline bool is_free_running_event(struct perf_event *event)
> +{
> + u64 cfg = event->attr.config;
> +
> + return (((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT)
> &&
> + (((cfg >> 8) & 0xff) >=
> UNCORE_FREE_RUNNING_MSR_START));
> +}
> +
> static inline void uncore_disable_box(struct intel_uncore_box *box)
> {
> if (box->pmu->type->ops->disable_box)
> diff --git a/arch/x86/events/intel/uncore_snbep.c
> b/arch/x86/events/intel/uncore_snbep.c
> index a719681..cbd6061 100644
> --- a/arch/x86/events/intel/uncore_snbep.c
> +++ b/arch/x86/events/intel/uncore_snbep.c
> @@ -3459,6 +3459,61 @@ static struct intel_uncore_ops
> skx_uncore_iio_ops = {
> .read_counter = uncore_msr_read_counter,
> };
>
> +enum perf_uncore_iio_free_running_msr_type_id {
> + SKX_IIO_MSR_IOCLK = 0,
> + SKX_IIO_MSR_BW = 1,
> + SKX_IIO_MSR_UTIL = 2,
> +
> + SKX_IIO_FREE_RUNNING_MSR_TYPE_MAX,
> +};
> +
> +
> +static struct free_running_msr skx_iio_free_running_msr[] = {
> + [SKX_IIO_MSR_IOCLK] = { 0xa45, 0x20, 1, 36 },
> + [SKX_IIO_MSR_BW] = { 0xb00, 0x10, 8, 36 },
> + [SKX_IIO_MSR_UTIL] = { 0xb08, 0x10, 8, 36 },
> +};
> +
> +static struct uncore_event_desc skx_uncore_iio_events[] = {
> + /* Free-Running IO CLOCKS Counter */
> + INTEL_UNCORE_EVENT_DESC(ioclk,
> "event=0xff,umask=0x10"),
> + /* Free-Running IIO Bandwidth Counters */
> + INTEL_UNCORE_EVENT_DESC(bw_in_port0,
> "event=0xff,umask=0x20"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port1,
> "event=0xff,umask=0x21"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port2,
> "event=0xff,umask=0x22"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port3,
> "event=0xff,umask=0x23"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port0,
> "event=0xff,umask=0x24"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port1,
> "event=0xff,umask=0x25"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port2,
> "event=0xff,umask=0x26"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port3,
> "event=0xff,umask=0x27"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale,
> "3.814697266e-6"),
> + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"),
> + /* Free-running IIO Utilization Counters */
> + INTEL_UNCORE_EVENT_DESC(util_in_port0,
> "event=0xff,umask=0x30"),
> + INTEL_UNCORE_EVENT_DESC(util_out_port0,
> "event=0xff,umask=0x31"),
> + INTEL_UNCORE_EVENT_DESC(util_in_port1,
> "event=0xff,umask=0x32"),
> + INTEL_UNCORE_EVENT_DESC(util_out_port1,
> "event=0xff,umask=0x33"),
> + INTEL_UNCORE_EVENT_DESC(util_in_port2,
> "event=0xff,umask=0x34"),
> + INTEL_UNCORE_EVENT_DESC(util_out_port2,
> "event=0xff,umask=0x35"),
> + INTEL_UNCORE_EVENT_DESC(util_in_port3,
> "event=0xff,umask=0x36"),
> + INTEL_UNCORE_EVENT_DESC(util_out_port3,
> "event=0xff,umask=0x37"),
> + { /* end: all zeroes */ },
> +};
> +
> static struct intel_uncore_type skx_uncore_iio = {
> .name = "iio",
> .num_counters = 4,
> @@ -3470,8 +3525,11 @@ static struct intel_uncore_type skx_uncore_iio = {
> .event_mask_ext =
> SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
> .box_ctl = SKX_IIO0_MSR_PMON_BOX_CTL,
> .msr_offset = SKX_IIO_MSR_OFFSET,
> + .num_free_running_type =
> SKX_IIO_FREE_RUNNING_MSR_TYPE_MAX,
> + .free_running = skx_iio_free_running_msr,
> .constraints = skx_uncore_iio_constraints,
> .ops = &skx_uncore_iio_ops,
> + .event_descs = skx_uncore_iio_events,
> .format_group = &skx_uncore_iio_format_group,
> };
>
> --
> 2.7.4