Re: [PATCH bpf-next] bpf: make update_prog_stats always_inline
From: Alexei Starovoitov
Date: Wed Jun 18 2025 - 14:10:07 EST
On Wed, Jun 18, 2025 at 11:06 AM Alexei Starovoitov
<alexei.starovoitov@xxxxxxxxx> wrote:
>
> On Wed, Jun 18, 2025 at 1:58 AM Menglong Dong <menglong8.dong@xxxxxxxxx> wrote:
> >
> > The function update_prog_stats() will be called in the bpf trampoline.
> > Make it always_inline to reduce the overhead.
>
> What kind of difference did you measure ?
>
> > Signed-off-by: Menglong Dong <dongml2@xxxxxxxxxxxxxxx>
> > ---
> > kernel/bpf/trampoline.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index c4b1a98ff726..134bcfd00b15 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> > @@ -911,8 +911,8 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
> > return bpf_prog_start_time();
> > }
> >
> > -static void notrace update_prog_stats(struct bpf_prog *prog,
> > - u64 start)
> > +static __always_inline void notrace update_prog_stats(struct bpf_prog *prog,
> > + u64 start)
> > {
>
> How about the following instead:
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index c4b1a98ff726..728bb2845f41 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -911,28 +911,23 @@ static u64 notrace __bpf_prog_enter_recur(struct
> bpf_prog *prog, struct bpf_tram
> return bpf_prog_start_time();
> }
>
> -static void notrace update_prog_stats(struct bpf_prog *prog,
> - u64 start)
> +static noinline void notrace __update_prog_stats(struct bpf_prog *prog,
> + u64 start)
> {
> struct bpf_prog_stats *stats;
> -
> - if (static_branch_unlikely(&bpf_stats_enabled_key) &&
> - /* static_key could be enabled in __bpf_prog_enter*
> - * and disabled in __bpf_prog_exit*.
> - * And vice versa.
> - * Hence check that 'start' is valid.
> - */
> - start > NO_START_TIME) {
> - u64 duration = sched_clock() - start;
> - unsigned long flags;
> -
> - stats = this_cpu_ptr(prog->stats);
> - flags = u64_stats_update_begin_irqsave(&stats->syncp);
> - u64_stats_inc(&stats->cnt);
> - u64_stats_add(&stats->nsecs, duration);
> - u64_stats_update_end_irqrestore(&stats->syncp, flags);
> - }
> + u64 duration = sched_clock() - start;
> + unsigned long flags;
> +
> + stats = this_cpu_ptr(prog->stats);
> + flags = u64_stats_update_begin_irqsave(&stats->syncp);
> + u64_stats_inc(&stats->cnt);
> + u64_stats_add(&stats->nsecs, duration);
> + u64_stats_update_end_irqrestore(&stats->syncp, flags);
> }
> +#define update_prog_stats(prog, start) \
> + if (static_branch_unlikely(&bpf_stats_enabled_key) && \
Or make this one always_inline function of two lines
and don't bother with a macro.
> + start > NO_START_TIME) \
> + __update_prog_stats(prog, start)
>
> static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
> struct bpf_tramp_run_ctx *run_ctx)
>
>
> Maybe
> if (start > NO_START_TIME)
> should stay within __update_prog_stats().
>
> pls run a few experiments.