Re: [PATCH] perf/arm-cmn: Check all the DTCs when reading global counters

From: Robin Murphy
Date: Mon Jan 23 2023 - 13:30:08 EST


On 2023-01-21 02:10, Ilkka Koskinen wrote:
Some events may be available on nodes, none of which belongs to DTC0.
When the driver reads the global counters, it stops as soon as it finds a
DTC that's not being used and, thus, ignores the rest. As the driver
doesn't read the paired global counters, overflowing local counters are
regarded as overflowing global counters (assuming the new local counter
value is smaller than the previous one) and therefore we can see values
around 2^64. Fix the issue by checking all the used DTCs.

The driver is still trying to find a counter that's available on all the
DTCs rather than doing per-DTC allocation of global counters. We may
need to change it in the future, if needed.

Urgh, this is probably the same thing that came to me via Arm support from another customer just recently as well... I'd been waiting to get a bit more diagnostic information back to figure out what to look for, but you've made it clear now, thanks.

If you don't mind, for now I think I'd rather go with a simpler option, which I shall post momentarily...

Cheers,
Robin.

Signed-off-by: Ilkka Koskinen <ilkka@xxxxxxxxxxxxxxxxxxxxxx>
---
drivers/perf/arm-cmn.c | 41 ++++++++++++++++++++++++++++++++---------
1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index b80a9b74662b..c516f091a002 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -540,7 +540,7 @@ struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
u64 dtm_idx[4];
unsigned int dtc_idx;
- u8 dtcs_used;
+ unsigned long dtcs_used;
u8 num_dns;
u8 dtm_offset;
bool wide_sel;
@@ -550,6 +550,9 @@ struct arm_cmn_hw_event {
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
+#define for_each_used_dtc(hw, cmn, i) \
+ for_each_set_bit(i, &(hw)->dtcs_used, (cmn)->num_dtcs)
+
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@@ -1272,7 +1275,7 @@ static void arm_cmn_init_counter(struct perf_event *event)
unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
u64 count;
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
+ for_each_used_dtc(hw, cmn, i) {
writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
cmn->dtc[i].counters[hw->dtc_idx] = event;
}
@@ -1301,7 +1304,7 @@ static void arm_cmn_event_read(struct perf_event *event)
delta = new - prev;
local_irq_save(flags);
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
+ for_each_used_dtc(hw, cmn, i) {
new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
delta += new << 16;
}
@@ -1614,10 +1617,32 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
- for (i = 0; hw->dtcs_used & (1U << i); i++)
+ for_each_used_dtc(hw, cmn, i)
cmn->dtc[i].counters[hw->dtc_idx] = NULL;
}
+static int arm_cmn_get_global_counter(struct arm_cmn *cmn,
+ struct arm_cmn_hw_event *hw)
+{
+ int dtc_idx, i;
+ bool available;
+
+ for (dtc_idx = 0; dtc_idx < CMN_DT_NUM_COUNTERS; dtc_idx++) {
+ available = true;
+ for_each_used_dtc(hw, cmn, i) {
+ if (cmn->dtc[i].counters[dtc_idx]) {
+ available = false;
+ break;
+ }
+ }
+
+ if (available)
+ return dtc_idx;
+ }
+
+ return -ENOSPC;
+}
+
static int arm_cmn_event_add(struct perf_event *event, int flags)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
@@ -1642,11 +1667,9 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
return 0;
}
- /* Grab a free global counter first... */
- dtc_idx = 0;
- while (dtc->counters[dtc_idx])
- if (++dtc_idx == CMN_DT_NUM_COUNTERS)
- return -ENOSPC;
+ dtc_idx = arm_cmn_get_global_counter(cmn, hw);
+ if (dtc_idx < 0)
+ return dtc_idx;
hw->dtc_idx = dtc_idx;