[Patch] SMP-IOAPIC NMI Software Watchdog driver, io-apic-patch-2.1.85-D

MOLNAR Ingo (mingo@chiara.csoma.elte.hu)
Mon, 9 Feb 1998 23:44:53 +0100 (CET)


'look ma, no more hard lockups in Linux'

this patch brings a vanilla 2.1.85 kernel up to the latest IOAPIC code.

Probably the most interesting change is the new software watchdog driver,
which uses the IO-APIC to send periodic broadcast NMI interrupts to all
CPUs in the system. The driver detects all cases when there is a soft/hard
lockup on a CPU, and generates an artificial oops:

LOCKUP on CPU1, forcing oops
Unable to handle kernel NULL pointer dereference at virtual address 00000000
current->tss.cr3 = 016ee000, %cr3 = 016ee000
*pde = 00000000
Oops: 0002
CPU: 1
EIP: 0010:[<c010b10c>]
EFLAGS: 00013082
[...]

as these broadcast interrupts are NMI interrupts, all lockups are detected,
no matter wether the CPU has it's IRQs locked or not. The logic wether there
is a lockup on a CPU is quite straightforward as well: the local APIC timer
IRQs have to be served periodically. When they are for some reason delayed
for more than 500 milliseconds, the NMI-Watchdog generates an oops.

the 'watchdog source IRQ' is default IRQ0, but to have this driver on
boards with broken IO-APIC's, i've made it configurable, and the driver
detects all these buggy cases correctly, and turns off the feature if
necessary. IRQ1 is the next most natural choice. [when there is a lockup,
pound the keyboard for a while to generate enough IRQs]

comments, suggestions, reports welcome,

-- mingo

--- 2.1.85/linux/drivers/char/Config.in Fri Feb 6 02:28:17 1998
+++ linux/drivers/char/Config.in Sun Feb 15 11:01:29 1998
@@ -103,6 +103,12 @@
tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG
tristate ' Berkshire Products PC Watchdog' CONFIG_PCWATCHDOG
tristate ' Acquire SBC Watchdog Timer' CONFIG_ACQUIRE_WDT
+ if [ "$SMP" = "1" ]; then
+ bool ' SMP-IOAPIC NMI Software Watchdog' CONFIG_NMI_WATCHDOG
+ if [ "$CONFIG_NMI_WATCHDOG" = "y" ]; then
+ int ' watchdog source IRQ' CONFIG_NMI_WATCHDOG_IRQ 0
+ fi
+ fi
fi
bool 'Enhanced Real Time Clock Support' CONFIG_RTC
if [ "$CONFIG_ALPHA_BOOK1" = "y" ]; then
--- 2.1.85/linux/arch/i386/kernel/traps.c Sat Jan 3 09:44:18 1998
+++ linux/arch/i386/kernel/traps.c Sun Feb 15 11:24:22 1998
@@ -22,6 +22,7 @@
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/kernel_stat.h>

#include <asm/system.h>
#include <asm/uaccess.h>
@@ -238,12 +239,15 @@
unlock_kernel();
}

+#ifndef CONFIG_NMI_WATCHDOG
static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
-}
+}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
static void io_check_error(unsigned char reason, struct pt_regs * regs)
{
unsigned long i;
@@ -259,14 +263,18 @@
reason &= ~8;
outb(reason, 0x61);
}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
{
unsigned char reason = inb(0x61);
@@ -280,6 +288,59 @@
if (!(reason & 0xc0))
unknown_nmi_error(reason, regs);
}
+#else
+
+/*
+ * FIXME: we assume here that the NMI came from the IO-APIC. It's a quite safe
+ * assumption in most cases, but if anyone knows a way to distinguish between
+ * NMI reasons, please speak up ... [i doubt that the IO-APIC does IO port 0x61
+ * correctly]
+ */
+
+extern atomic_t apic_timer_irqs [NR_CPUS];
+extern spinlock_t console_lock;
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ /*
+ * the best way to detect wether a CPU has a 'hard lockup' problem
+ * is to check it's local APIC timer IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are broadcasted to every CPU, here
+ * we only have to check the current processor.
+ *
+ * since NMIs dont listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up console_lock first ...
+ * [when there will be more tty-related locks, break them up
+ * here too!]
+ */
+
+ static atomic_t last_irq_sums [NR_CPUS] = { ATOMIC_INIT(0), };
+ static atomic_t alert_counter [NR_CPUS] = { ATOMIC_INIT(0), };
+
+ int sum, cpu = hard_smp_processor_id();
+
+ sum = atomic_read(apic_timer_irqs+cpu);
+
+ if (atomic_read(last_irq_sums+cpu) == sum) {
+ /*
+ * Ayiee, looks like this CPU is stuck ...
+ * wait a few IRQs (half a second) before doing the oops ...
+ */
+ atomic_inc(alert_counter+cpu);
+ if (atomic_read(alert_counter+cpu) == HZ/2) {
+ spin_unlock(&console_lock);
+ printk("NMI Watchdog detected LOCKUP on CPU%d, forcing oops\n", cpu);
+ *(int *)0=0;
+ }
+ } else {
+ atomic_set(last_irq_sums+cpu,sum);
+ atomic_set(alert_counter+cpu,0);
+ }
+}
+#endif

asmlinkage void do_debug(struct pt_regs * regs, long error_code)
{
--- 2.1.85/linux/arch/i386/kernel/irq.c Tue Feb 10 08:43:02 1998
+++ linux/arch/i386/kernel/irq.c Fri Feb 13 09:06:03 1998
@@ -719,6 +719,14 @@
irq_handles[irq]->enable(irq);
}

+void make_8259A_irq (unsigned int irq)
+{
+ io_apic_irqs &= ~(1<<irq);
+ irq_handles[irq] = &i8259A_irq_type;
+ disable_irq(irq);
+ enable_irq(irq);
+}
+
/*
* Careful! The 8259A is a fragile beast, it pretty
* much _has_ to be done exactly like this (mask it
--- 2.1.85/linux/arch/i386/kernel/smp.c Tue Feb 10 08:43:02 1998
+++ linux/arch/i386/kernel/smp.c Sun Feb 15 11:22:44 1998
@@ -148,6 +148,8 @@
int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
extern int mp_irq_entries;
extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
+int mp_current_pci_id = 0;

/* #define SMP_DEBUG */

@@ -336,9 +338,13 @@
mp_bus_id_to_type[m->mpc_busid] =
MP_BUS_ISA;
else
- if (strncmp(m->mpc_bustype,"PCI",3) == 0)
+ if (strncmp(m->mpc_bustype,"PCI",3) == 0) {
mp_bus_id_to_type[m->mpc_busid] =
MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] =
+ mp_current_pci_id;
+ mp_current_pci_id++;
+ }
mpt+=sizeof(*m);
count+=sizeof(*m);
break;
@@ -1404,6 +1410,9 @@
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
+#ifdef CONFIG_NMI_WATCHDOG
+atomic_t apic_timer_irqs [NR_CPUS] = { ATOMIC_INIT(0), };
+#endif
void smp_apic_timer_interrupt(struct pt_regs * regs)
{
/*
@@ -1412,7 +1421,17 @@
* want to be able to accept NMI tlb invalidates
* during this time.
*/
+
+#ifdef CONFIG_NMI_WATCHDOG
+ int cpu = hard_smp_processor_id();
+ /*
+ * the only thing that can lock an NMI is an unACK-ed APIC ...
+ */
+ atomic_inc(apic_timer_irqs+cpu);
+#endif
+
ack_APIC_irq ();
+

smp_local_timer_interrupt(regs);
}
--- 2.1.85/linux/arch/i386/kernel/irq.h Tue Feb 10 08:43:02 1998
+++ linux/arch/i386/kernel/irq.h Fri Feb 13 09:06:00 1998
@@ -18,6 +18,7 @@
void setup_IO_APIC (void);
void init_IO_APIC_traps(void);
int IO_APIC_get_PCI_irq_vector (int bus, int slot, int fn);
+void make_8259A_irq (unsigned int irq);

#ifdef __SMP__
extern unsigned int io_apic_irqs;
@@ -34,6 +35,7 @@
MP_BUS_PCI
};
extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern char ioapic_OEM_ID [16];
extern char ioapic_Product_ID [16];

--- 2.1.85/linux/arch/i386/kernel/io_apic.c Tue Feb 10 08:43:01 1998
+++ linux/arch/i386/kernel/io_apic.c Sun Feb 15 11:12:02 1998
@@ -35,6 +35,9 @@
* spontaneously, GCC should not cache it
*/
volatile unsigned int * io_apic_reg = NULL;
+#ifdef CONFIG_NMI_WATCHDOG
+int nmi_pin = -1;
+#endif

/*
* The structure of the IO-APIC:
@@ -62,6 +65,7 @@
__u32 vector : 8,
delivery_mode : 3, /* 000: FIXED
* 001: lowest prio
+ * 100: NMI
* 111: ExtInt
*/
dest_mode : 1, /* 0: physical, 1: logical */
@@ -191,10 +195,12 @@
{
int i;

- for (i=mp_irq_entries-1; i>=0; i--) {
- if (mp_irqs[i].mpc_dstirq == pin)
+ for (i=0; i<mp_irq_entries; i++)
+ if ( (mp_irqs[i].mpc_irqtype == 0x00) &&
+ (mp_irqs[i].mpc_dstirq == pin))
+
return i;
- }
+
return -1;
}

@@ -268,6 +274,21 @@

if (!IO_APIC_IRQ(irq))
continue;
+ if (mp_irqs[i].mpc_irqtype)
+ continue;
+
+#ifdef CONFIG_NMI_WATCHDOG
+ if (irq==CONFIG_NMI_WATCHDOG_IRQ) {
+ entry.delivery_mode = 4; /* broadcast NMI */
+ make_8259A_irq(irq);
+ /*
+ * Remember which register has the NMI IRQ entry,
+ * so we can turn it off in case there is some
+ * incompatibility
+ */
+ nmi_pin = i;
+ }
+#endif

entry.vector = IO_APIC_GATE_OFFSET + (irq<<3);

@@ -397,11 +418,11 @@
for (i=0; i<mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;

- if (IO_APIC_IRQ(i) &&
+ if (IO_APIC_IRQ(mp_irqs[i].mpc_dstirq) &&
(mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
!mp_irqs[i].mpc_irqtype &&
- (bus == mp_irqs[i].mpc_srcbus) &&
- (slot == (mp_irqs[i].mpc_srcbusirq >> 2)) &&
+ (bus == mp_bus_id_to_pci_bus[mp_irqs[i].mpc_srcbus]) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f)) &&
(pci_pin == (mp_irqs[i].mpc_srcbusirq & 3)))

return mp_irqs[i].mpc_dstirq;
@@ -409,6 +430,30 @@
return -1;
}

+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+static int timer_irq_works (void)
+{
+ unsigned int t1=jiffies;
+ unsigned long flags;
+
+ save_flags(flags);
+ sti();
+
+ udelay(100*1000);
+
+ if (jiffies-t1>1)
+ return 1;
+
+ return 0;
+}
+
void print_IO_APIC (void)
{
int i;
@@ -579,7 +624,7 @@
pirqs_enabled)
{
printk("ENABLING IO-APIC IRQs\n");
- io_apic_irqs = ~((1<<0)|(1<<2)|(1<<13));
+ io_apic_irqs = ~((1<<2)|(1<<13));
} else {
if (ioapic_blacklisted())
printk(" blacklisted board, DISABLING IO-APIC IRQs\n");
@@ -592,6 +637,26 @@

init_IO_APIC_traps();
setup_IO_APIC_irqs ();
+
+#ifdef CONFIG_NMI_WATCHDOG
+ if (nmi_pin == -1)
+ printk(".. NMI watchdog has invalid source IRQ.\n");
+ else
+ printk("NMI Watchdog activated on source IRQ %d\n",
+ CONFIG_NMI_WATCHDOG_IRQ);
+#endif
+
+ if (!timer_irq_works ()) {
+ make_8259A_irq(0);
+ if (!timer_irq_works ())
+ panic("IO-APIC + timer doesnt work!");
+ printk("..MP-BIOS bug: i8254 timer not connected to IO-APIC\n");
+ printk("..falling back to 8259A-based timer interrupt\n");
+#ifdef CONFIG_NMI_WATCHDOG
+ if ((nmi_pin != -1) && (CONFIG_NMI_WATCHDOG_IRQ == 0))
+ printk(".. NMI Watchdog disabled as source IRQ is timer!\n");
+#endif
+ }

printk("nr of MP irq sources: %d.\n", mp_irq_entries);
printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu