[RFC] libperf: Add support for user space counter access

From: Rob Herring
Date: Fri Aug 07 2020 - 19:05:22 EST


x86 and arm64 can both support direct access of event counters in
userspace. The access sequence is less than trivial and currently exists
in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in
projects such as PAPI and libpfm4.

Patches to add arm64 userspace support are pending[1].

For this RFC, looking for a yes, seems like a good idea, or no, go away we
don't want this in libperf.

TODO:
- Handle userspace access not enabled.
- Handle pmc_width and cap_user_time_short in read loop.
- Move existing rdpmc test to libperf based test?
- Abstract out rdtsc/rdpmc (currently only builds on x86 and need to add
Arm versions)

[1] https://lore.kernel.org/r/20200707205333.624938-1-robh@xxxxxxxxxx/

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Signed-off-by: Rob Herring <robh@xxxxxxxxxx>
---
.../perf/Documentation/examples/user_read.c | 103 ++++++++++++++++++
tools/lib/perf/include/perf/mmap.h | 2 +
tools/lib/perf/libperf.map | 1 +
tools/lib/perf/mmap.c | 72 ++++++++++++
4 files changed, 178 insertions(+)
create mode 100644 tools/lib/perf/Documentation/examples/user_read.c

diff --git a/tools/lib/perf/Documentation/examples/user_read.c b/tools/lib/perf/Documentation/examples/user_read.c
new file mode 100644
index 000000000000..47d5f1935861
--- /dev/null
+++ b/tools/lib/perf/Documentation/examples/user_read.c
@@ -0,0 +1,103 @@
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int libperf_print(enum libperf_print_level level,
+ const char *fmt, va_list ap)
+{
+ return vfprintf(stderr, fmt, ap);
+}
+
+static unsigned long long read_counter(struct perf_evlist *evlist)
+{
+ struct perf_mmap *map;
+ unsigned long long cnt;
+
+ perf_evlist__for_each_mmap(evlist, map, false) {
+ while ((cnt = perf_mmap__read_self(map)) == ~0ULL) ;
+ return cnt;
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int count = 100000, err = 0;
+ unsigned long long start, end;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_thread_map *threads;
+ struct perf_cpu_map *cpus;
+ struct perf_counts_values counts;
+
+ struct perf_event_attr attr1 = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ };
+
+ libperf_init(libperf_print);
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ fprintf(stderr, "failed to create evlist\n");
+ goto out_threads;
+ }
+ evsel = perf_evsel__new(&attr1);
+ if (!evsel) {
+ fprintf(stderr, "failed to create evsel1\n");
+ goto out_evlist;
+ }
+ perf_evlist__add(evlist, evsel);
+
+ threads = perf_thread_map__new_dummy();
+ if (!threads) {
+ fprintf(stderr, "failed to create threads\n");
+ return -1;
+ }
+ perf_thread_map__set_pid(threads, 0, 0);
+ cpus = perf_cpu_map__dummy_new();
+ if (!cpus) {
+ fprintf(stderr, "failed to create cpus\n");
+ return -1;
+ }
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ err = perf_evlist__open(evlist);
+ if (err) {
+ fprintf(stderr, "failed to open evsel\n");
+ goto out_evlist;
+ }
+
+ err = perf_evlist__mmap(evlist, 0);
+ if (err) {
+ fprintf(stderr, "failed to mmap evlist\n");
+ goto out_evlist;
+ }
+
+ perf_evlist__enable(evlist);
+
+ start = read_counter(evlist);
+ while (count--);
+ end = read_counter(evlist);
+
+ perf_evlist__disable(evlist);
+
+ fprintf(stdout, "start %llu, end %llu, delta %llu\n",
+ start, end, end-start);
+
+ perf_evlist__close(evlist);
+out_evlist:
+ perf_evlist__delete(evlist);
+out_threads:
+ perf_thread_map__put(threads);
+ return err;
+}
diff --git a/tools/lib/perf/include/perf/mmap.h b/tools/lib/perf/include/perf/mmap.h
index 9508ad90d8b9..2d0a7e8b13db 100644
--- a/tools/lib/perf/include/perf/mmap.h
+++ b/tools/lib/perf/include/perf/mmap.h
@@ -12,4 +12,6 @@ LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map);
LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map);
LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map);

+LIBPERF_API __u64 perf_mmap__read_self(struct perf_mmap *map);
+
#endif /* __LIBPERF_MMAP_H */
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 7be1af8a546c..676a73300add 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -46,6 +46,7 @@ LIBPERF_0.0.1 {
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
+ perf_mmap__read_self;
local:
*;
};
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 79d5ed6c38cc..5a167618f4c5 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -273,3 +273,75 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)

return event;
}
+
+static u64 rdpmc(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+
+__u64 perf_mmap__read_self(struct perf_mmap *map)
+{
+ struct perf_event_mmap_page *pc = map->base;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return ~0;
+
+ do {
+ seq = pc->lock;
+ barrier();
+
+ enabled = pc->time_enabled;
+ running = pc->time_running;
+
+ if (enabled != running) {
+ cyc = rdtsc();
+ time_mult = pc->time_mult;
+ time_shift = pc->time_shift;
+ time_offset = pc->time_offset;
+ }
+
+ idx = pc->index;
+ count = pc->offset;
+ if (idx)
+ count += rdpmc(idx - 1);
+
+ barrier();
+ } while (pc->lock != seq);
+
+ if (enabled != running) {
+ u64 quot, rem;
+
+ quot = (cyc >> time_shift);
+ rem = cyc & (((u64)1 << time_shift) - 1);
+ delta = time_offset + quot * time_mult +
+ ((rem * time_mult) >> time_shift);
+
+ enabled += delta;
+ if (idx)
+ running += delta;
+
+ quot = count / running;
+ rem = count % running;
+ count = quot * enabled + (rem * enabled) / running;
+ }
+
+ return count;
+}
--
2.25.1