Re: [PATCH 1/1] perf trace: Add BPF augmenter to perf_event_open()'s 'struct perf_event_attr' arg

From: Ian Rogers
Date: Fri Nov 04 2022 - 16:56:28 EST


On Fri, Nov 4, 2022 at 1:30 PM Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> wrote:
>
> Please test it together with what is in my tmp.perf/core branch, as it
> has the fixes for perf to build and run with libbpf 1.0, please install
> libbpf-devel from your distro, at least for now.
>
> There is polishing to do here, but its interesting as an example of how
> to augment a syscall.
>
> If you run 'perf trace -v' with
> ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c you'll
> see that it reuses augmenters for other syscalls that have similar
> signatures, so, in the future, doing for one struct gets it reused in
> all the other syscalls using such struct.
>
> Also more annotation is needed, as with other syscalls already in 'perf
> trace' to associate a type syscall integer arguments, for things like
> flags, enumerations not using enum, etc.
>
> - Arnaldo
>

Looks good.

Before:
$ sudo perf trace -e ./augmented_raw_syscalls.o,perf_event_open
--max-events 10 perf stat --quiet sleep 0.001
0.000 ( 0.011 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9ab1e0, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 3
0.014 ( 0.261 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9ae6b0, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 4
0.276 ( 0.192 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9ae920, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 5
0.470 ( 0.012 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9aeb90, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 7
0.485 ( 0.005 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9acdc0, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 8
0.493 ( 0.003 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9aaf60, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 9
0.498 ( 0.003 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9ad030, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 10
0.502 ( 0.002 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9ad2a0, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 11
0.506 ( 0.002 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9a3120, pid: 366156 (perf), cpu: -1, group_fd: -1, flags:
FD_CLOEXEC) = 12
0.510 ( 0.003 ms): perf/366155 perf_event_open(attr_uptr:
0x56344c9a3410, pid: 366156 (perf), cpu: -1, group_fd: 12, flags:
FD_CLOEXEC) = 13

After:
$ sudo /tmp/perf/perf trace -e
./augmented_raw_syscalls.o,perf_event_open --max-events 10 perf stat
--quiet sleep 0.001
0.000 ( 0.009 ms): perf/366177 perf_event_open(attr_uptr: { type:
1, size: 128, config: 0x1, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 3
0.013 ( 0.093 ms): perf/366177 perf_event_open(attr_uptr: { type:
1, size: 128, config: 0x3, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 4
0.107 ( 0.026 ms): perf/366177 perf_event_open(attr_uptr: { type:
1, size: 128, config: 0x4, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 5
0.135 ( 0.026 ms): perf/366177 perf_event_open(attr_uptr: { type:
1, size: 128, config: 0x2, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 7
0.162 ( 0.006 ms): perf/366177 perf_event_open(attr_uptr: { size:
128, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 8
0.169 ( 0.002 ms): perf/366177 perf_event_open(attr_uptr: { size:
128, config: 0x1, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 9
0.172 ( 0.003 ms): perf/366177 perf_event_open(attr_uptr: { size:
128, config: 0x4, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 10
0.176 ( 0.002 ms): perf/366177 perf_event_open(attr_uptr: { size:
128, config: 0x5, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1,
enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 11
0.179 ( 0.002 ms): perf/366177 perf_event_open(attr_uptr: { type:
4, size: 128, config: 0x400, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING|ID|GROUP, disabled: 1, inherit:
1, enable_on_exec: 1, exclude_guest: 1 }, pid: 366178 (perf), cpu: -1,
group_fd: -1, flags: FD_CLOEXEC) = 12
0.182 ( 0.003 ms): perf/366177 perf_event_open(attr_uptr: { type:
4, size: 128, config: 0x8000, sample_type: IDENTIFIER, read_format:
TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING|ID|GROUP, inherit: 1,
exclude_guest: 1 }, pid: 366178 (perf), cpu: -1, group_fd: 12, flags:
FD_CLOEXEC) = 13

Tested-by: Ian Rogers <irogers@xxxxxxxxxx>

Thanks,
Ian

> ----
>
> Using BPF for that, doing a cleverish reuse of perf_event_attr__fprintf(),
> that really needs to be turned into __snprintf(), etc.
>
> But since the plan is to go the BTF way probably use libbpf's
> btf_dump__dump_type_data().
>
> Example:
>
> [root@quaco ~]# perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,perf_event_open --max-events 10 perf stat --quiet sleep 0.001
> fg
> 0.000 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
> 0.067 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x3, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
> 0.120 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5
> 0.172 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x2, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 7
> 0.190 perf_event_open(attr_uptr: { size: 128, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 8
> 0.199 perf_event_open(attr_uptr: { size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 9
> 0.204 perf_event_open(attr_uptr: { size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 10
> 0.210 perf_event_open(attr_uptr: { size: 128, config: 0x5, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 11
> [root@quaco ~]#
>
> Suggested-by: Ian Rogers <irogers@xxxxxxxxxx>
> Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> ---
> tools/perf/builtin-trace.c | 3 +-
> .../examples/bpf/augmented_raw_syscalls.c | 44 +++++++++++++++++++
> tools/perf/trace/beauty/perf_event_open.c | 44 +++++++++++++++++++
> 3 files changed, 90 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 72991528687ea60b..5690c33c523b37d7 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -1053,7 +1053,8 @@ static struct syscall_fmt syscall_fmts[] = {
> .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
> [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
> { .name = "perf_event_open",
> - .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
> + .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ },
> + [2] = { .scnprintf = SCA_INT, /* cpu */ },
> [3] = { .scnprintf = SCA_FD, /* group_fd */ },
> [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
> { .name = "pipe2",
> diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
> index 926238efd7d8d0df..0599823e8ae1b4c0 100644
> --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
> +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
> @@ -129,6 +129,7 @@ struct augmented_args_payload {
> struct augmented_arg arg, arg2;
> };
> struct sockaddr_storage saddr;
> + char __data[sizeof(struct augmented_arg)];
> };
> };
>
> @@ -293,6 +294,49 @@ int sys_enter_renameat(struct syscall_enter_args *args)
> return augmented__output(args, augmented_args, len);
> }
>
> +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
> +
> +// we need just the start, get the size to then copy it
> +struct perf_event_attr_size {
> + __u32 type;
> + /*
> + * Size of the attr structure, for fwd/bwd compat.
> + */
> + __u32 size;
> +};
> +
> +SEC("!syscalls:sys_enter_perf_event_open")
> +int sys_enter_perf_event_open(struct syscall_enter_args *args)
> +{
> + struct augmented_args_payload *augmented_args = augmented_args_payload();
> + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
> + unsigned int len = sizeof(augmented_args->args);
> +
> + if (augmented_args == NULL)
> + goto failure;
> +
> + if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
> + goto failure;
> +
> + attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
> +
> + __u32 size = attr_read->size;
> +
> + if (!size)
> + size = PERF_ATTR_SIZE_VER0;
> +
> + if (size > sizeof(augmented_args->__data))
> + goto failure;
> +
> + // Now that we read attr->size and tested it against the size limits, read it completely
> + if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
> + goto failure;
> +
> + return augmented__output(args, augmented_args, len + size);
> +failure:
> + return 1; /* Failure: don't filter */
> +}
> +
> static pid_t getpid(void)
> {
> return bpf_get_current_pid_tgid();
> diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
> index 11d47dbe63bd1edc..01ee15fe9d0c7a98 100644
> --- a/tools/perf/trace/beauty/perf_event_open.c
> +++ b/tools/perf/trace/beauty/perf_event_open.c
> @@ -44,3 +44,47 @@ static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
> }
>
> #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
> +
> +struct attr_fprintf_args {
> + size_t size, printed;
> + char *bf;
> + bool first;
> +};
> +
> +static int attr__fprintf(FILE *fp __maybe_unused, const char *name, const char *val, void *priv)
> +{
> + struct attr_fprintf_args *args = priv;
> + size_t printed = scnprintf(args->bf + args->printed , args->size - args->printed, "%s%s: %s", args->first ? "" : ", ", name, val);
> +
> + args->first = false;
> + args->printed += printed;
> + return printed;
> +}
> +
> +static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf, size_t size, bool show_zeros __maybe_unused)
> +{
> + struct attr_fprintf_args args = {
> + .printed = scnprintf(bf, size, "{ "),
> + .size = size,
> + .first = true,
> + .bf = bf,
> + };
> +
> + perf_event_attr__fprintf(stdout, attr, attr__fprintf, &args);
> + return args.printed + scnprintf(bf + args.printed, size - args.printed, " }");
> +}
> +
> +static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size)
> +{
> + return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros);
> +}
> +
> +static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg)
> +{
> + if (arg->augmented.args)
> + return syscall_arg__scnprintf_augmented_perf_event_attr(arg, bf, size);
> +
> + return scnprintf(bf, size, "%#lx", arg->val);
> +}
> +
> +#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr
> --
> 2.37.3
>