Re: [PATCH V7 08/14] perf/x86/intel: Generic support for hardware TopDown metrics

From: Liang, Kan
Date: Fri Jul 24 2020 - 15:10:58 EST




On 7/24/2020 12:07 PM, Liang, Kan wrote:


On 7/24/2020 11:27 AM, peterz@xxxxxxxxxxxxx wrote:
On Fri, Jul 24, 2020 at 03:19:06PM +0200, peterz@xxxxxxxxxxxxx wrote:
On Thu, Jul 23, 2020 at 10:11:11AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote:
@@ -3375,6 +3428,72 @@ static int intel_pmu_hw_config(struct perf_event *event)
ÂÂÂÂÂ if (event->attr.type != PERF_TYPE_RAW)
ÂÂÂÂÂÂÂÂÂ return 0;
+ÂÂÂ /*
+ÂÂÂÂ * Config Topdown slots and metric events
+ÂÂÂÂ *
+ÂÂÂÂ * The slots event on Fixed Counter 3 can support sampling,
+ÂÂÂÂ * which will be handled normally in x86_perf_event_update().
+ÂÂÂÂ *
+ÂÂÂÂ * The metric events don't support sampling.
+ÂÂÂÂ *
+ÂÂÂÂ * For counting, topdown slots and metric events will be
+ÂÂÂÂ * handled specially for event update.
+ÂÂÂÂ * A flag PERF_X86_EVENT_TOPDOWN is applied for the case.
+ÂÂÂÂ */
+ÂÂÂ if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
+ÂÂÂÂÂÂÂ if (is_metric_event(event)) {
+ÂÂÂÂÂÂÂÂÂÂÂ struct perf_event *leader = event->group_leader;
+ÂÂÂÂÂÂÂÂÂÂÂ struct perf_event *sibling;
+
+ÂÂÂÂÂÂÂÂÂÂÂ /* The metric events don't support sampling. */
+ÂÂÂÂÂÂÂÂÂÂÂ if (is_sampling_event(event))
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;
+
+ÂÂÂÂÂÂÂÂÂÂÂ /* The metric events cannot be a group leader. */
+ÂÂÂÂÂÂÂÂÂÂÂ if (leader == event)
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;
+
+ÂÂÂÂÂÂÂÂÂÂÂ /*
+ÂÂÂÂÂÂÂÂÂÂÂÂ * The slots event cannot be the leader of a topdown
+ÂÂÂÂÂÂÂÂÂÂÂÂ * sample-read group, e.g., {slots, topdown-retiring}:S
+ÂÂÂÂÂÂÂÂÂÂÂÂ */
+ÂÂÂÂÂÂÂÂÂÂÂ if (is_slots_event(leader) && is_sampling_event(leader))
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;

This has nothing to do with sample-read; SLOTS cannot be sampling when
coupled with the METRIC stuff because hardware is daft.

And you can have SAMPLE_READ on non-leader events just fine.

+
+ÂÂÂÂÂÂÂÂÂÂÂ /*
+ÂÂÂÂÂÂÂÂÂÂÂÂ * The slots event must be before the metric events,
+ÂÂÂÂÂÂÂÂÂÂÂÂ * because we only update the values of a topdown
+ÂÂÂÂÂÂÂÂÂÂÂÂ * group once with the slots event.
+ÂÂÂÂÂÂÂÂÂÂÂÂ */
+ÂÂÂÂÂÂÂÂÂÂÂ if (!is_slots_event(leader)) {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ for_each_sibling_event(sibling, leader) {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (is_slots_event(sibling))
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (is_metric_event(sibling))
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
+ÂÂÂÂÂÂÂÂÂÂÂ }

Per the SIBLING patch this then wants to be:

ÂÂÂÂÂÂÂÂÂÂÂ if (!is_slots_event(leader))
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;

ÂÂÂÂÂÂÂÂÂÂÂ event->event_caps |= PERF_EV_CAP_SIBLING.
ÂÂÂÂÂÂÂÂÂÂÂ /*
ÂÂÂÂÂÂÂÂÂÂÂÂ * Only once we have a METRICs sibling to we
ÂÂÂÂÂÂÂÂÂÂÂÂ * need TopDown magic.
ÂÂÂÂÂÂÂÂÂÂÂÂ */
ÂÂÂÂÂÂÂÂÂÂÂ leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;

Since we only set the flag for the SLOTS event now, the V7 patch will treat the metric events as normal events, which trigger an error.

To fix the error, I think we may merge the changes as below with the [PATCH 08/14] perf/x86/intel: Generic support for hardware TopDown metrics.

I think we don't need the PERF_X86_EVENT_TOPDOWN flag anymore.
If it's a non-sampling slots event, apply the special function.
If it's a metric event, do nothing.

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 0f3d01562ded..02dfee0b6615 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -73,10 +73,10 @@ u64 x86_perf_event_update(struct perf_event *event)
u64 prev_raw_count, new_raw_count;
u64 delta;

- if (unlikely(!hwc->event_base))
+ if (unlikely(!hwc->event_base || is_metric_event(event)))
return 0;

- if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
+ if (unlikely(is_slots_count(event)) && x86_pmu.update_topdown_event)
return x86_pmu.update_topdown_event(event);

/*
@@ -1280,11 +1280,10 @@ int x86_perf_event_set_period(struct perf_event *event)
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;

- if (unlikely(!hwc->event_base))
+ if (unlikely(!hwc->event_base || is_metric_event(event)))
return 0;

- if (unlikely(is_topdown_count(event)) &&
- x86_pmu.set_topdown_event_period)
+ if (unlikely(is_slots_count(event)) && x86_pmu.set_topdown_event_period)
return x86_pmu.set_topdown_event_period(event);

/*
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6cb079e0c9d9..010ac74afc09 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2405,27 +2405,18 @@ static u64 icl_update_topdown_event(struct perf_event *event)
return slots;
}

-static void intel_pmu_read_topdown_event(struct perf_event *event)
+static void intel_pmu_read_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /* Only need to call update_topdown_event() once for group read. */
- if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
- !is_slots_event(event))
+ if (unlikely(is_metric_event(event)))
return;

- perf_pmu_disable(event->pmu);
- x86_pmu.update_topdown_event(event);
- perf_pmu_enable(event->pmu);
-}
-
-static void intel_pmu_read_event(struct perf_event *event)
-{
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_auto_reload_read(event);
- else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
- intel_pmu_read_topdown_event(event);
- else
+ else if (is_slots_count(event) && x86_pmu.update_topdown_event) {
+ perf_pmu_disable(event->pmu);
+ x86_pmu.update_topdown_event(event);
+ perf_pmu_enable(event->pmu);
+ } else
x86_perf_event_update(event);
}

@@ -3606,12 +3597,14 @@ static int intel_pmu_hw_config(struct perf_event *event)
*
* The slots event on Fixed Counter 3 can support sampling,
* which will be handled normally in x86_perf_event_update().
- *
* The metric events don't support sampling.
*
- * For counting, topdown slots and metric events will be
- * handled specially for event update.
- * A flag PERF_X86_EVENT_TOPDOWN is applied for the case.
+ * For the counting of the slots and metric events, the rules
+ * as below have to be applied:
+ * - A standalone metric event or pure metric events group is
+ * not supported.
+ * - The metric events group must include the slots event.
+ * - The slots event must be the leader of the group.
*/
if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
if (event->attr.config1 || event->attr.config2)
@@ -3647,11 +3640,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
return -EINVAL;

event->event_caps |= PERF_EV_CAP_SIBLING;
- /*
- * Only once we have a METRICs sibling do we
- * need TopDown magic.
- */
- leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
}
}

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 345442410a4d..153db209c105 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -79,12 +79,6 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
-#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
-
-static inline bool is_topdown_count(struct perf_event *event)
-{
- return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
-}

static inline bool is_metric_event(struct perf_event *event)
{
@@ -105,6 +99,14 @@ static inline bool is_topdown_event(struct perf_event *event)
return is_metric_event(event) || is_slots_event(event);
}

+static inline bool is_slots_count(struct perf_event *event)
+{
+ if (is_slots_event(event) && !is_sampling_event(event))
+ return true;
+
+ return false;
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */


Thanks,
Kan

+ÂÂÂÂÂÂÂ }
+
+ÂÂÂÂÂÂÂ if (!is_sampling_event(event)) {
+ÂÂÂÂÂÂÂÂÂÂÂ if (event->attr.config1 != 0)
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;

How does this depend on sampling?

+ÂÂÂÂÂÂÂÂÂÂÂ /*
+ÂÂÂÂÂÂÂÂÂÂÂÂ * The TopDown metrics events and slots event don't
+ÂÂÂÂÂÂÂÂÂÂÂÂ * support any filters.
+ÂÂÂÂÂÂÂÂÂÂÂÂ */
+ÂÂÂÂÂÂÂÂÂÂÂ if (event->attr.config & X86_ALL_EVENT_FLAGS)
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return -EINVAL;

That seems independent of sampling too. Even a sampling SLOTS shouldn't
be having any of those afaict.

+
+ÂÂÂÂÂÂÂÂÂÂÂ event->hw.flags |= PERF_X86_EVENT_TOPDOWN;

This is confusing too, a !sampling SLOTS event without METRIC siblings
shouldn't have this set, right? So arguably, this should be like above.

+
+ÂÂÂÂÂÂÂÂÂÂÂ event->event_caps |= PERF_EV_CAP_COEXIST;
+
+ÂÂÂÂÂÂÂÂÂÂÂ if (is_metric_event(event))
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ event->hw.flags &= ~PERF_X86_EVENT_RDPMC_ALLOWED;

This too seems like something that should be in the is_metric_event()
branch above.

+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
ÂÂÂÂÂ if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
ÂÂÂÂÂÂÂÂÂ return 0;

FWIW, I pushed out a branch with all these changes in:

ÂÂ git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git perf/metric

Just to get it some build love, if you want it differently, I'm happy to
throw it all out again.

Thanks Peter.

I will pull the branch and do more tests.

Thanks,
Kan