Re: [PATCH v2 2/2] perf-stat: enable counting events for BPF programs

From: Jiri Olsa
Date: Mon Dec 07 2020 - 17:08:44 EST


On Thu, Dec 03, 2020 at 10:13:10PM -0800, Song Liu wrote:

SNIP

> +#include "bpf_skel/bpf_prog_profiler.skel.h"
> +
> +static inline void *u64_to_ptr(__u64 ptr)
> +{
> + return (void *)(unsigned long)ptr;
> +}
> +
> +static void set_max_rlimit(void)
> +{
> + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
> +
> + setrlimit(RLIMIT_MEMLOCK, &rinf);
> +}
> +
> +static inline struct bpf_counter *bpf_counter_alloc(void)

why is this inlined?

SNIP

> +static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
> +{
> + struct bpf_prog_profiler_bpf *skel;
> + struct bpf_counter *counter;
> + struct bpf_program *prog;
> + char *prog_name;
> + int prog_fd;
> + int err;
> +
> + prog_fd = bpf_prog_get_fd_by_id(prog_id);
> + if (prog_fd < 0) {
> + pr_debug("Failed to open fd for bpf prog %u\n", prog_id);
> + return -1;
> + }
> + counter = bpf_counter_alloc();
> + if (!counter)
> + return -1;
> +
> + skel = bpf_prog_profiler_bpf__open();
> + if (!skel) {
> + pr_debug("Failed to load bpf skeleton\n");

I'm still getting

[root@dell-r440-01 perf]# ./perf stat -b 38
libbpf: elf: skipping unrecognized data section(9) .eh_frame
libbpf: elf: skipping relo section(15) .rel.eh_frame for section(9) .eh_frame
libbpf: XXX is not found in vmlinux BTF
libbpf: failed to load object 'bpf_prog_profiler_bpf'
libbpf: failed to load BPF skeleton 'bpf_prog_profiler_bpf': -2
...

with id 38 being:

38: tracepoint name sys_enter tag 03418b72a610af75 gpl
loaded_at 2020-12-07T22:54:05+0100 uid 0
xlated 272B jited 153B memlock 4096B map_ids 1

how is this supposed to work when there's XXX in the
program's section? libbpf is trying to find XXX in
kernel BTF and fails of course


> + free(counter);
> + return -1;
> + }
> + skel->rodata->num_cpu = evsel__nr_cpus(evsel);
> +
> + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
> + bpf_map__resize(skel->maps.fentry_readings, 1);
> + bpf_map__resize(skel->maps.accum_readings, 1);
> +

SNIP

> +static int bpf_program_profiler__read(struct evsel *evsel)
> +{
> + int num_cpu = evsel__nr_cpus(evsel);
> + struct bpf_perf_event_value values[num_cpu];
> + struct bpf_counter *counter;
> + int reading_map_fd;
> + __u32 key = 0;
> + int err, cpu;
> +
> + if (list_empty(&evsel->bpf_counter_list))
> + return -EAGAIN;
> +
> + for (cpu = 0; cpu < num_cpu; cpu++) {
> + perf_counts(evsel->counts, cpu, 0)->val = 0;
> + perf_counts(evsel->counts, cpu, 0)->ena = 0;
> + perf_counts(evsel->counts, cpu, 0)->run = 0;
> + }
> + list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
> + struct bpf_prog_profiler_bpf *skel = counter->skel;
> +
> + reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
> +
> + err = bpf_map_lookup_elem(reading_map_fd, &key, values);
> + if (err) {
> + fprintf(stderr, "failed to read value\n");
> + return err;
> + }
> +
> + for (cpu = 0; cpu < num_cpu; cpu++) {
> + perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
> + perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
> + perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
> + }

so we sum everything up for all provided bpf IDs,
should we count/display them separately?

SNIP

> +SEC("fentry/XXX")
> +int BPF_PROG(fentry_XXX)
> +{
> + __u32 key = bpf_get_smp_processor_id();
> + struct bpf_perf_event_value reading;
> + struct bpf_perf_event_value *ptr;
> + __u32 zero = 0;
> + long err;
> +
> + /* look up before reading, to reduce error */
> + ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
> + if (!ptr)
> + return 0;
> +
> + err = bpf_perf_event_read_value(&events, key, &reading,
> + sizeof(reading));

can't we read directly to ptr in here?

SNIP

> /* THREAD and SYSTEM/CPU are mutually exclusive */
> if (target->per_thread && (target->system_wide || target->cpu_list)) {
> target->per_thread = false;
> @@ -109,6 +137,10 @@ static const char *target__error_str[] = {
> "PID/TID switch overriding SYSTEM",
> "UID switch overriding SYSTEM",
> "SYSTEM/CPU switch overriding PER-THREAD",
> + "BPF switch overriding CPU",
> + "BPF switch overriding PID/TID",
> + "BPF switch overriding UID",
> + "BPF switch overriding THREAD",
> "Invalid User: %s",
> "Problems obtaining information for user %s",
> };
> @@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum,
>
> switch (errnum) {
> case TARGET_ERRNO__PID_OVERRIDE_CPU ...
> - TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD:

hum, this should stay, no?

thanks,
jirka

> + TARGET_ERRNO__BPF_OVERRIDE_THREAD:
> snprintf(buf, buflen, "%s", msg);
> break;
>
> diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h

SNIP