Re: [PATCH -tip] kgdb, x86: Pull up NMI notifier handler priority

From: Dongdong Deng
Date: Wed Mar 23 2011 - 23:30:31 EST


On Thu, Mar 24, 2011 at 4:32 AM, Cyrill Gorcunov <gorcunov@xxxxxxxxxx> wrote:
> kgdb needs IPI to be sent and handled before perf
> or anything else NMI, otherwise kgdb hangs with bootup
> self-tests (found on P4 HT SMP machine). Raise its priority
> so that we're called first in a notifier chain.
>
> Reported-by: Don Zickus <dzickus@xxxxxxxxxx>
> Tested-by: Lin Ming <ming.m.lin@xxxxxxxxx>
> CC: Jason Wessel <jason.wessel@xxxxxxxxxxxxx>
> Signed-off-by: Cyrill Gorcunov <gorcunov@xxxxxxxxxx>
> ---
>
> Don, Jason, take a look please.
>
> Âarch/x86/kernel/kgdb.c | Â Â7 +++++--
> Â1 file changed, 5 insertions(+), 2 deletions(-)
>
> Index: linux-2.6.git/arch/x86/kernel/kgdb.c
> =====================================================================
> --- linux-2.6.git.orig/arch/x86/kernel/kgdb.c
> +++ linux-2.6.git/arch/x86/kernel/kgdb.c
> @@ -592,9 +592,12 @@ static struct notifier_block kgdb_notifi
> Â Â Â Â.notifier_call Â= kgdb_notify,
>
> Â Â Â Â/*
> - Â Â Â Â* Lowest-prio notifier priority, we want to be notified last:
> + Â Â Â Â* We might need to send an IPI and
> + Â Â Â Â* do cpu roundup before anything else
> + Â Â Â Â* in notifier chain so high priority
> + Â Â Â Â* is needed.
> Â Â Â Â */
> -    .priority    = NMI_LOCAL_LOW_PRIOR,
> +    .priority    = NMI_LOCAL_HIGH_PRIOR,
> Â};

CC: kgdb maillist.

I quote Jason's early email to explain why debugger tends to
set the low level of die_notifier.

"The original thinking was that if you are using a low level debugger
that you would want to stop on such a event or breakpoint because there
is nothing else handling it and your system is about to print an oops
message."

For keeping above purpose, I have a "ugly" proposal that splitting
kgdb die handling to two parts.

1: The first part with HIGH priority and just handle NMI event,
if the NMI event is not belong kgdb, it return NOTIFY_DONE and pass to others.

2: The second part with low priority to handling others.

Due to above handling logic could let kgdb source get complexly, I
couldn't make sure
it is a suitable method here, if it is OK, I can send a formal patch. :-)


$git diff
-----------
arch/x86/kernel/kgdb.c | 67 +++++++++++++++++++++++++++++++++++------------
1 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index dba0b36..afd18db 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -516,23 +516,6 @@ static int __kgdb_notify(struct die_args *args,
unsigned long cmd)
struct pt_regs *regs = args->regs;

switch (cmd) {
- case DIE_NMI:
- if (atomic_read(&kgdb_active) != -1) {
- /* KGDB CPU roundup */
- kgdb_nmicallback(raw_smp_processor_id(), regs);
- was_in_debug_nmi[raw_smp_processor_id()] = 1;
- touch_nmi_watchdog();
- return NOTIFY_STOP;
- }
- return NOTIFY_DONE;
-
- case DIE_NMIUNKNOWN:
- if (was_in_debug_nmi[raw_smp_processor_id()]) {
- was_in_debug_nmi[raw_smp_processor_id()] = 0;
- return NOTIFY_STOP;
- }
- return NOTIFY_DONE;
-
case DIE_DEBUG:
if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
if (user_mode(regs))
@@ -588,6 +571,52 @@ kgdb_notify(struct notifier_block *self, unsigned
long cmd, void *ptr)
return ret;
}

+static int
+kgdb_nmi_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ unsigned long flags;
+ struct pt_regs *regs = ((struct die_args*)ptr)->regs;
+ int ret = NOTIFY_DONE;
+
+ local_irq_save(flags);
+
+ switch (cmd) {
+ case DIE_NMI:
+ if (atomic_read(&kgdb_active) != -1) {
+ /* KGDB CPU roundup */
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+ was_in_debug_nmi[raw_smp_processor_id()] = 1;
+ touch_nmi_watchdog();
+ ret = NOTIFY_STOP;
+ }
+ break;
+
+ case DIE_NMIUNKNOWN:
+ if (was_in_debug_nmi[raw_smp_processor_id()]) {
+ was_in_debug_nmi[raw_smp_processor_id()] = 0;
+ ret = NOTIFY_STOP;
+ }
+ break;
+ default:
+ break;
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+
+static struct notifier_block kgdb_nmi_notifier = {
+ .notifier_call = kgdb_nmi_notify,
+
+ /*
+ * We might need to send an IPI and
+ * do cpu roundup before anything else
+ * in notifier chain so high priority
+ * is needed.
+ */
+ .priority = NMI_LOCAL_HIGH_PRIOR,
+};
+
static struct notifier_block kgdb_notifier = {
.notifier_call = kgdb_notify,

@@ -605,6 +634,9 @@ static struct notifier_block kgdb_notifier = {
*/
int kgdb_arch_init(void)
{
+ int err = register_die_notifier(&kgdb_nmi_notifier);
+ if (err)
+ return err;
return register_die_notifier(&kgdb_notifier);
}

@@ -673,6 +705,7 @@ void kgdb_arch_exit(void)
breakinfo[i].pev = NULL;
}
}
+ unregister_die_notifier(&kgdb_nmi_notifier);
unregister_die_notifier(&kgdb_notifier);
}

--
1.6.0.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/