[PATCH 5/5] perf ftrace: Implement cpu and task filters in BPF

From: Namhyung Kim
Date: Mon Nov 29 2021 - 18:18:49 EST


Honor cpu and task options to set up filters (by pid or tid) in the
BPF program. For example, the following command will show latency of
the mutex_lock for process 2570.

# perf ftrace latency -b -T mutex_lock -p 2570 sleep 3
# DURATION | COUNT | GRAPH |
0 - 1 us | 675 | ############################## |
1 - 2 us | 9 | |
2 - 4 us | 0 | |
4 - 8 us | 0 | |
8 - 16 us | 0 | |
16 - 32 us | 0 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |

Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/util/bpf_ftrace.c | 41 +++++++++++++++++++++
tools/perf/util/bpf_skel/func_latency.bpf.c | 21 +++++++++++
2 files changed, 62 insertions(+)

diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 1975a6fe73c9..18d6f1558163 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -6,7 +6,10 @@
#include <linux/err.h>

#include "util/ftrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
#include "util/debug.h"
+#include "util/evlist.h"
#include "util/bpf_counter.h"

#include "util/bpf_skel/func_latency.skel.h"
@@ -16,6 +19,7 @@ static struct func_latency_bpf *skel;
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
{
int fd, err;
+ int i, ncpus = 1, ntasks = 1;
struct filter_entry *func;
struct bpf_link *begin_link, *end_link;

@@ -33,6 +37,17 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}

+ /* don't need to set cpu filter for system-wide mode */
+ if (ftrace->target.cpu_list) {
+ ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
set_max_rlimit();

err = func_latency_bpf__load(skel);
@@ -41,6 +56,32 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
goto out;
}

+ if (ftrace->target.cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = cpu_map__cpu(ftrace->evlist->core.cpus, i);
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
begin_link = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(begin_link)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index d7d31cfeabf8..17a2a53b4f16 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -36,6 +36,8 @@ struct {


int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;

SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -46,6 +48,25 @@ int BPF_PROG(func_begin)
return 0;

key = bpf_get_current_pid_tgid();
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u32 pid = key & 0xffffffff;
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
now = bpf_ktime_get_ns();

// overwrite timestamp for nested functions
--
2.34.0.rc2.393.gf8c9666880-goog