Re: [PATCH] x86/nmi: remove the irqwork from long duration nmi handler

From: Changbin Du
Date: Tue Jan 07 2020 - 09:42:03 EST


Hi, Thomas,
Have you checked this one? I think this even can consider as a fix.

On Wed, Jan 01, 2020 at 03:20:17PM +0800, Changbin Du wrote:
> First, printk is NMI context safe now since the safe printk has been
> implemented. The safe printk already has an irqwork to make NMI context
> safe.
>
> Second, the NMI irqwork actually does not work if a NMI handler causes
> panic by watchdog timeout. This NMI irqwork have no chance to run in such
> case, while the safe printk will flush its per-cpu buffer before panic.
>
> Signed-off-by: Changbin Du <changbin.du@xxxxxxxxx>
> ---
> arch/x86/include/asm/nmi.h | 1 -
> arch/x86/kernel/nmi.c | 20 +++++++++-----------
> 2 files changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
> index 75ded1d13d98..9d5d949e662e 100644
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -41,7 +41,6 @@ struct nmiaction {
> struct list_head list;
> nmi_handler_t handler;
> u64 max_duration;
> - struct irq_work irq_work;
> unsigned long flags;
> const char *name;
> };
> diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> index e676a9916c49..aa15d4f2340f 100644
> --- a/arch/x86/kernel/nmi.c
> +++ b/arch/x86/kernel/nmi.c
> @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
> }
> fs_initcall(nmi_warning_debugfs);
>
> -static void nmi_max_handler(struct irq_work *w)
> +static void nmi_check_duration(struct nmiaction *action, u64 duration)
> {
> - struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
> int remainder_ns, decimal_msecs;
> - u64 whole_msecs = READ_ONCE(a->max_duration);
> + u64 whole_msecs = READ_ONCE(action->max_duration);
> +
> + if (duration < nmi_longest_ns || duration < action->max_duration)
> + return;
> +
> + action->max_duration = duration;
>
> remainder_ns = do_div(whole_msecs, (1000 * 1000));
> decimal_msecs = remainder_ns / 1000;
>
> printk_ratelimited(KERN_INFO
> "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
> - a->handler, whole_msecs, decimal_msecs);
> + action->handler, whole_msecs, decimal_msecs);
> }
>
> static int nmi_handle(unsigned int type, struct pt_regs *regs)
> @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
> delta = sched_clock() - delta;
> trace_nmi_handler(a->handler, (int)delta, thishandled);
>
> - if (delta < nmi_longest_ns || delta < a->max_duration)
> - continue;
> -
> - a->max_duration = delta;
> - irq_work_queue(&a->irq_work);
> + nmi_check_duration(a, delta);
> }
>
> rcu_read_unlock();
> @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
> if (!action->handler)
> return -EINVAL;
>
> - init_irq_work(&action->irq_work, nmi_max_handler);
> -
> raw_spin_lock_irqsave(&desc->lock, flags);
>
> /*
> --
> 2.24.0
>

--
Cheers,
Changbin Du