Jeff Lessem wrote:
>
> I have done a bit more work on the problem I reported in my message
> "Crashes reading and writing to disk". To recap, on a machine with
> 8GB of RAM, either
>
> dd if=/dev/zero bs=1G count=10 | split -b 1073741824
>
> or
>
> find /bigfulldisk -type f -exec cat {} \; > /dev/null
>
> can reliably cause a crash.
It seems that one of your CPUs is stuck in an interrupt
routine. Could you please try running with the below
patch? Feed the output through ksymoops.
Also (but separately) try enabling the NMI watchdog with
the `nmi_watchdog=1' kernel boot parameter.
This one will be hard to hunt down.
--- linux-2.4.7-pre6/arch/i386/kernel/irq.c Wed Jul 4 18:21:24 2001
+++ lk-ext3/arch/i386/kernel/irq.c Tue Jul 17 11:03:54 2001
@@ -280,7 +280,7 @@ static inline void wait_on_irq(int cpu)
for (;;) {
if (!--count) {
- show("wait_on_irq");
+ show_trace_smp();
count = ~0;
}
__sti();
--- linux-2.4.7-pre6/arch/i386/kernel/traps.c Wed Jul 4 18:21:24 2001
+++ lk-ext3/arch/i386/kernel/traps.c Tue Jul 17 11:04:58 2001
@@ -101,7 +101,7 @@ void show_trace(unsigned long * stack)
if (!stack)
stack = (unsigned long*)&stack;
- printk("Call Trace: ");
+ printk(KERN_DEBUG "Call Trace: ");
i = 1;
module_start = VMALLOC_START;
module_end = VMALLOC_END;
@@ -119,8 +119,10 @@ void show_trace(unsigned long * stack)
if (((addr >= (unsigned long) &_stext) &&
(addr <= (unsigned long) &_etext)) ||
((addr >= module_start) && (addr <= module_end))) {
- if (i && ((i % 8) == 0))
- printk("\n ");
+ if (i && ((i % 8) == 0)) {
+ printk("\n");
+ printk(KERN_DEBUG " ");
+ }
printk("[<%08lx>] ", addr);
i++;
}
@@ -153,13 +155,50 @@ void show_stack(unsigned long * esp)
for(i=0; i < kstack_depth_to_print; i++) {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
- if (i && ((i % 8) == 0))
- printk("\n ");
+ if (i && ((i % 8) == 0)) {
+ printk("\n");
+ printk(KERN_DEBUG " ");
+ }
printk("%08lx ", *stack++);
}
printk("\n");
show_trace(esp);
}
+
+static void show_trace_local(void)
+{
+ printk(KERN_DEBUG "CPU %d:\n", smp_processor_id());
+ show_trace(0);
+}
+
+#ifdef CONFIG_SMP
+static atomic_t trace_cpu;
+
+static void show_trace_one(void *dummy)
+{
+ while (atomic_read(&trace_cpu) != smp_processor_id())
+ ;
+ show_trace_local();
+ atomic_inc(&trace_cpu);
+ while (atomic_read(&trace_cpu) != smp_num_cpus)
+ ;
+}
+
+void show_trace_smp(void)
+{
+ atomic_set(&trace_cpu, 0);
+ smp_call_function(show_trace_one, 0, 1, 0);
+ show_trace_one(0);
+}
+
+#else
+
+void show_trace_smp(void)
+{
+ show_trace_local();
+}
+
+#endif
static void show_registers(struct pt_regs *regs)
{
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Mon Jul 23 2001 - 21:00:08 EST