Re: [RFC bpf-next 3/4] bpf: Resolve symbols with kallsyms_lookup_names for kprobe multi link

From: Alexei Starovoitov
Date: Fri Apr 08 2022 - 19:26:20 EST


On Thu, Apr 07, 2022 at 02:52:23PM +0200, Jiri Olsa wrote:
> Using kallsyms_lookup_names function to speed up symbols lookup in
> kprobe multi link attachment and replacing with it the current
> kprobe_multi_resolve_syms function.
>
> This speeds up bpftrace kprobe attachment:
>
> # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }'
> ...
> 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% )
>
> After:
>
> # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }'
> ...
> 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% )
>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
> kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++----------------
> 1 file changed, 73 insertions(+), 50 deletions(-)
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index b26f3da943de..2602957225ba 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx {
> unsigned long entry_ip;
> };
>
> +struct user_syms {
> + const char **syms;
> + char *buf;
> +};
> +
> +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt)
> +{
> + const char __user **usyms_copy = NULL;
> + const char **syms = NULL;
> + char *buf = NULL, *p;
> + int err = -EFAULT;
> + unsigned int i;
> + size_t size;
> +
> + size = cnt * sizeof(*usyms_copy);
> +
> + usyms_copy = kvmalloc(size, GFP_KERNEL);
> + if (!usyms_copy)
> + return -ENOMEM;
> +
> + if (copy_from_user(usyms_copy, usyms, size))
> + goto error;
> +
> + err = -ENOMEM;
> + syms = kvmalloc(size, GFP_KERNEL);
> + if (!syms)
> + goto error;
> +
> + /* TODO this potentially allocates lot of memory (~6MB in my tests
> + * with attaching ~40k functions). I haven't seen this to fail yet,
> + * but it could be changed to allocate memory gradually if needed.
> + */

Why would 6MB kvmalloc fail?
If we don't have such memory the kernel will be ooming soon anyway.
I don't think we'll see this kvmalloc triggering oom in practice.
The verifier allocates a lot more memory to check large programs.

imo this approach is fine. It's simple.
Trying to do gradual alloc with realloc would be just guessing.

Another option would be to ask user space (libbpf) to do the sort.
There are pros and cons.
This vmalloc+sort is slightly better imo.

> + size = cnt * KSYM_NAME_LEN;
> + buf = kvmalloc(size, GFP_KERNEL);
> + if (!buf)
> + goto error;
> +
> + for (p = buf, i = 0; i < cnt; i++) {
> + err = strncpy_from_user(p, usyms_copy[i], KSYM_NAME_LEN);
> + if (err == KSYM_NAME_LEN)
> + err = -E2BIG;
> + if (err < 0)
> + goto error;
> + syms[i] = p;
> + p += err + 1;
> + }
> +
> + err = 0;
> + us->syms = syms;
> + us->buf = buf;
> +
> +error:
> + kvfree(usyms_copy);
> + if (err) {
> + kvfree(syms);
> + kvfree(buf);
> + }
> + return err;
> +}
> +
> +static void free_user_syms(struct user_syms *us)
> +{
> + kvfree(us->syms);
> + kvfree(us->buf);
> +}
> +
> static void bpf_kprobe_multi_link_release(struct bpf_link *link)
> {
> struct bpf_kprobe_multi_link *kmulti_link;
> @@ -2346,55 +2412,6 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
> kprobe_multi_link_prog_run(link, entry_ip, regs);
> }
>
> -static int
> -kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt,
> - unsigned long *addrs)
> -{
> - unsigned long addr, size;
> - const char __user **syms;
> - int err = -ENOMEM;
> - unsigned int i;
> - char *func;
> -
> - size = cnt * sizeof(*syms);
> - syms = kvzalloc(size, GFP_KERNEL);
> - if (!syms)
> - return -ENOMEM;
> -
> - func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL);
> - if (!func)
> - goto error;
> -
> - if (copy_from_user(syms, usyms, size)) {
> - err = -EFAULT;
> - goto error;
> - }
> -
> - for (i = 0; i < cnt; i++) {
> - err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN);
> - if (err == KSYM_NAME_LEN)
> - err = -E2BIG;
> - if (err < 0)
> - goto error;
> - err = -EINVAL;
> - addr = kallsyms_lookup_name(func);
> - if (!addr)
> - goto error;
> - if (!kallsyms_lookup_size_offset(addr, &size, NULL))
> - goto error;
> - addr = ftrace_location_range(addr, addr + size - 1);
> - if (!addr)
> - goto error;
> - addrs[i] = addr;
> - }
> -
> - err = 0;
> -error:
> - kvfree(syms);
> - kfree(func);
> - return err;
> -}
> -
> int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> {
> struct bpf_kprobe_multi_link *link = NULL;
> @@ -2438,7 +2455,13 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
> goto error;
> }
> } else {
> - err = kprobe_multi_resolve_syms(usyms, cnt, addrs);
> + struct user_syms us;
> +
> + err = copy_user_syms(&us, usyms, cnt);
> + if (err)
> + goto error;
> + err = kallsyms_lookup_names(us.syms, cnt, addrs);
> + free_user_syms(&us);
> if (err)
> goto error;
> }
> --
> 2.35.1
>