Re: [PATCH v7 2/4] libperf: Add evsel mmap support

From: Rob Herring
Date: Tue Apr 13 2021 - 15:08:13 EST


On Tue, Apr 13, 2021 at 1:39 PM Arnaldo Carvalho de Melo
<acme@xxxxxxxxxx> wrote:
>
> Em Tue, Apr 13, 2021 at 12:16:04PM -0500, Rob Herring escreveu:
> > In order to support usersapce access, an event must be mmapped. While
> > there's already mmap support for evlist, the usecase is a bit different
> > than the self monitoring with userspace access. So let's add a new
> > perf_evsel__mmap() function to mmap an evsel. This allows implementing
> > userspace access as a fastpath for perf_evsel__read().
> >
> > The mmapped address is returned by perf_evsel__mmap_base() which
> > primarily for users/tests to check if userspace access is enabled.
> >
> > Signed-off-by: Rob Herring <robh@xxxxxxxxxx>
> > ---
> > v7:
> > - Add NULL fd check to perf_evsel__mmap
> > v6:
> > - split mmap struct into it's own xyarray
> > v5:
> > - Create an mmap for every underlying event opened. Due to this, we
> > need a different way to get the mmap ptr, so perf_evsel__mmap_base()
> > is introduced.
> > v4:
> > - Change perf_evsel__mmap size to pages instead of bytes
> > v3:
> > - New patch split out from user access patch
> > ---
> > tools/lib/perf/Documentation/libperf.txt | 2 +
> > tools/lib/perf/evsel.c | 54 ++++++++++++++++++++++++
> > tools/lib/perf/include/internal/evsel.h | 1 +
> > tools/lib/perf/include/perf/evsel.h | 2 +
> > tools/lib/perf/libperf.map | 2 +
> > 5 files changed, 61 insertions(+)
> >
> > diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
> > index 0c74c30ed23a..a2c73df191ca 100644
> > --- a/tools/lib/perf/Documentation/libperf.txt
> > +++ b/tools/lib/perf/Documentation/libperf.txt
> > @@ -136,6 +136,8 @@ SYNOPSIS
> > struct perf_thread_map *threads);
> > void perf_evsel__close(struct perf_evsel *evsel);
> > void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
> > + int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
> > + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
> > int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
> > struct perf_counts_values *count);
> > int perf_evsel__enable(struct perf_evsel *evsel);
> > diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
> > index 4dc06289f4c7..7e140763552f 100644
> > --- a/tools/lib/perf/evsel.c
> > +++ b/tools/lib/perf/evsel.c
> > @@ -11,10 +11,12 @@
> > #include <stdlib.h>
> > #include <internal/xyarray.h>
> > #include <internal/cpumap.h>
> > +#include <internal/mmap.h>
> > #include <internal/threadmap.h>
> > #include <internal/lib.h>
> > #include <linux/string.h>
> > #include <sys/ioctl.h>
> > +#include <sys/mman.h>
> >
> > void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
> > {
> > @@ -38,6 +40,7 @@ void perf_evsel__delete(struct perf_evsel *evsel)
> > }
> >
> > #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
> > +#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
> >
> > int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
> > {
> > @@ -55,6 +58,13 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
> > return evsel->fd != NULL ? 0 : -ENOMEM;
> > }
> >
> > +static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads)
> > +{
> > + evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap));
> > +
> > + return evsel->mmap != NULL ? 0 : -ENOMEM;
> > +}
> > +
> > static int
> > sys_perf_event_open(struct perf_event_attr *attr,
> > pid_t pid, int cpu, int group_fd,
> > @@ -137,6 +147,8 @@ void perf_evsel__free_fd(struct perf_evsel *evsel)
> > {
> > xyarray__delete(evsel->fd);
> > evsel->fd = NULL;
> > + xyarray__delete(evsel->mmap);
> > + evsel->mmap = NULL;
> > }
> >
> > void perf_evsel__close(struct perf_evsel *evsel)
> > @@ -156,6 +168,48 @@ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
> > perf_evsel__close_fd_cpu(evsel, cpu);
> > }
> >
> > +int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
> > +{
> > + int ret, cpu, thread;
> > + struct perf_mmap_param mp = {
> > + .prot = PROT_READ | PROT_WRITE,
> > + .mask = (pages * page_size) - 1,
> > + };
> > +
> > + if (evsel->fd == NULL)
> > + return -EINVAL;
> > +
> > + if (evsel->mmap == NULL &&
> > + perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
> > + return -ENOMEM;
> > +
> > + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
> > + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
> > + int fd = FD(evsel, cpu, thread);
> > + struct perf_mmap *map = MMAP(evsel, cpu, thread);
> > +
> > + if (fd < 0)
> > + continue;
> > +
> > + perf_mmap__init(map, NULL, false, NULL);
> > +
> > + ret = perf_mmap__mmap(map, &mp, fd, cpu);
> > + if (ret)
> > + return -1;
> > + }
> > + }
> > +
> > + return 0;
> > +}
>
> Where is the counterpart?

I was assuming implicitly unmapped when closing the fd(s), but looks
like it's when exiting the process only.

I.e. perf_evsel__munmap(), and it should be
> called if perf_evsel__mmap() fails, right?

If perf_evsel__mmap() fails, the caller shouldn't have to do anything
WRT mmap, right? But if the perf_mmap__mmap() call fails, we do need
some internal clean-up. I'll fix both.

Rob