Re: [PATCH v4 05/27] x86: Replace ist_enter() with nmi_enter()

From: Masami Hiramatsu
Date: Fri Mar 06 2020 - 20:54:00 EST


Hi Peter,

On Wed, 26 Feb 2020 11:27:58 +0100
Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:

> On Mon, Feb 24, 2020 at 05:02:31PM -0500, Steven Rostedt wrote:
>
> > The other is for the hwlat detector that measures the time it was in an
> > NMI, as NMIs appear as a hardware latency too.
>
> Yeah,.. I hate that one. But I ended up with this patch.
>
> And yes, I know some of those notrace annotations are strictly
> unnessecary due to Makefile crap, but having them is _SO_ much easier.
>
> ---
> Subject: x86,tracing: Robustify ftrace_nmi_enter()
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Date: Mon Feb 24 23:40:29 CET 2020
>
> ftrace_nmi_enter()
> trace_hwlat_callback()
> trace_clock_local()
> sched_clock()
> paravirt_sched_clock()
> native_sched_clock()
>
> All must not be traced or kprobed, it will be called from do_debug()
> before the kprobe handler.

As I found today, we need to make NOKPROBE on exit side too, and this
covers exit side.

Reviewed-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>

Thank you,


>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/paravirt.h | 2 +-
> arch/x86/kernel/tsc.c | 7 +++++--
> include/linux/ftrace_irq.h | 4 ++--
> kernel/trace/trace_clock.c | 2 ++
> kernel/trace/trace_hwlat.c | 4 +++-
> 5 files changed, 13 insertions(+), 6 deletions(-)
>
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -17,7 +17,7 @@
> #include <linux/cpumask.h>
> #include <asm/frame.h>
>
> -static inline unsigned long long paravirt_sched_clock(void)
> +static __always_inline unsigned long long paravirt_sched_clock(void)
> {
> return PVOP_CALL0(unsigned long long, time.sched_clock);
> }
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -14,6 +14,7 @@
> #include <linux/percpu.h>
> #include <linux/timex.h>
> #include <linux/static_key.h>
> +#include <linux/kprobes.h>
>
> #include <asm/hpet.h>
> #include <asm/timer.h>
> @@ -207,7 +208,7 @@ static void __init cyc2ns_init_secondary
> /*
> * Scheduler clock - returns current time in nanosec units.
> */
> -u64 native_sched_clock(void)
> +notrace u64 native_sched_clock(void)
> {
> if (static_branch_likely(&__use_tsc)) {
> u64 tsc_now = rdtsc();
> @@ -228,6 +229,7 @@ u64 native_sched_clock(void)
> /* No locking but a rare wrong value is not a big deal: */
> return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
> }
> +NOKPROBE_SYMBOL(native_sched_clock);
>
> /*
> * Generate a sched_clock if you already have a TSC value.
> @@ -240,10 +242,11 @@ u64 native_sched_clock_from_tsc(u64 tsc)
> /* We need to define a real function for sched_clock, to override the
> weak default version */
> #ifdef CONFIG_PARAVIRT
> -unsigned long long sched_clock(void)
> +notrace unsigned long long sched_clock(void)
> {
> return paravirt_sched_clock();
> }
> +NOKPROBE_SYMBOL(sched_clock);
>
> bool using_native_sched_clock(void)
> {
> --- a/include/linux/ftrace_irq.h
> +++ b/include/linux/ftrace_irq.h
> @@ -7,7 +7,7 @@ extern bool trace_hwlat_callback_enabled
> extern void trace_hwlat_callback(bool enter);
> #endif
>
> -static inline void ftrace_nmi_enter(void)
> +static __always_inline void ftrace_nmi_enter(void)
> {
> #ifdef CONFIG_HWLAT_TRACER
> if (trace_hwlat_callback_enabled)
> @@ -15,7 +15,7 @@ static inline void ftrace_nmi_enter(void
> #endif
> }
>
> -static inline void ftrace_nmi_exit(void)
> +static __always_inline void ftrace_nmi_exit(void)
> {
> #ifdef CONFIG_HWLAT_TRACER
> if (trace_hwlat_callback_enabled)
> --- a/kernel/trace/trace_clock.c
> +++ b/kernel/trace/trace_clock.c
> @@ -22,6 +22,7 @@
> #include <linux/sched/clock.h>
> #include <linux/ktime.h>
> #include <linux/trace_clock.h>
> +#include <linux/kprobes.h>
>
> /*
> * trace_clock_local(): the simplest and least coherent tracing clock.
> @@ -44,6 +45,7 @@ u64 notrace trace_clock_local(void)
>
> return clock;
> }
> +NOKPROBE_SYMBOL(trace_clock_local);
> EXPORT_SYMBOL_GPL(trace_clock_local);
>
> /*
> --- a/kernel/trace/trace_hwlat.c
> +++ b/kernel/trace/trace_hwlat.c
> @@ -43,6 +43,7 @@
> #include <linux/cpumask.h>
> #include <linux/delay.h>
> #include <linux/sched/clock.h>
> +#include <linux/kprobes.h>
> #include "trace.h"
>
> static struct trace_array *hwlat_trace;
> @@ -137,7 +138,7 @@ static void trace_hwlat_sample(struct hw
> #define init_time(a, b) (a = b)
> #define time_u64(a) a
>
> -void trace_hwlat_callback(bool enter)
> +notrace void trace_hwlat_callback(bool enter)
> {
> if (smp_processor_id() != nmi_cpu)
> return;
> @@ -156,6 +157,7 @@ void trace_hwlat_callback(bool enter)
> if (enter)
> nmi_count++;
> }
> +NOKPROBE_SYMBOL(trace_hwlat_callback);
>
> /**
> * get_sample - sample the CPU TSC and look for likely hardware latencies


--
Masami Hiramatsu <mhiramat@xxxxxxxxxx>