Hi,
Attached is the patch with few i386 MCA updates. Summary of changes:
- Logging of corrected (non critical) MCA errors on P4.
- Don't clear the MCA status info. in case of a non-recoverable error. If OS
has failed in logging those,
BIOS can still have a look at that info.
- Minor bug fix in do_mce_timer(). Check current cpu registers too, while
calling smp_call_function().
Thanks,
-Venkatesh
--- linux-2.5.45/arch/i386/kernel/bluesmoke.c.org Tue Oct 29 14:16:48
2002
+++ linux-2.5.45/arch/i386/kernel/bluesmoke.c Tue Oct 29 14:48:50 2002
@@ -209,10 +209,6 @@
printk(" at %08x%08x", ahigh, alow);
}
printk("\n");
- /* Clear it */
- wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
- /* Serialize */
- wmb();
}
}
@@ -220,7 +216,24 @@
panic("CPU context corrupt");
if(recover&1)
panic("Unable to continue");
+
printk(KERN_EMERG "Attempting to continue.\n");
+ /*
+ * Do not clear the MSR_IA32_MCi_STATUS if the error is not
+ * recoverable/continuable.This will allow BIOS to look at the MSRs
+ * for errors if the OS could not log the error.
+ */
+ for (i=0;i<banks;i++) {
+ unsigned int msr;
+ msr = MSR_IA32_MC0_STATUS+i*4;
+ rdmsr(msr,low, high);
+ if(high&(1<<31)) {
+ /* Clear it */
+ wrmsr(msr, 0UL, 0UL);
+ /* Serialize */
+ wmb();
+ }
+ }
mcgstl&=~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
}
@@ -300,6 +313,7 @@
static void do_mce_timer(void *data)
{
+ mce_checkregs(NULL);
smp_call_function (mce_checkregs, NULL, 1, 1);
}
@@ -471,6 +485,20 @@
case X86_VENDOR_INTEL:
intel_mcheck_init(c);
+#ifdef CONFIG_X86_MCE_NONFATAL
+#define CPU_PENTIUM4 15
+ if (c->x86 == CPU_PENTIUM4 && timerset == 0) {
+ /* Set the timer to check for non-fatal
+ errors every MCE_RATE seconds */
+ init_timer (&mce_timer);
+ mce_timer.expires = jiffies + MCE_RATE;
+ mce_timer.data = 0;
+ mce_timer.function = &mce_timerfunc;
+ add_timer (&mce_timer);
+ timerset = 1;
+ printk(KERN_INFO "Machine check exception
polling timer started.\n");
+ }
+#endif
break;
case X86_VENDOR_CENTAUR:
--- linux-2.5.45/arch/i386/Kconfig.org Thu Oct 31 14:25:19 2002
+++ linux-2.5.45/arch/i386/Kconfig Thu Oct 31 14:25:48 2002
@@ -439,7 +439,7 @@
the 386 and 486, so nearly everyone can say Y here.
config X86_MCE_NONFATAL
- bool "Check for non-fatal errors on Athlon/Duron"
+ bool "Check for non-fatal errors on Athlon/Duron/Pentium-4"
depends on X86_MCE
help
Enabling this feature starts a timer that triggers every 5 seconds
which
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Thu Nov 07 2002 - 22:00:21 EST