Re: wait_on_irq, CPU1

Manfred Spraul (manfreds@colorfullife.com)
Wed, 29 Dec 1999 00:15:35 +0100


This is a multi-part message in MIME format.
--------------83C536FA66691FC63184323E
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Oleg Drokin wrote:
>
> > wait_on_irq() doesn't print enough informations to locate the problem. How
> > often does the server hang? I have a patch which would extend the debugging
> > information if the problem happens again.
> How much overhead it places in case of normal operations?
> If not much, I am willing to apply it.
>

My patch is attached.
it only affects the oops & show() functions, ie it should have
zero impact except during the actual hang. It prints a complete stack
dump including modules for all CPU's - as long as the secondary CPU
doesn't spin with disabled interupts.

--
	Manfred

Btw, I've just received the announcement for 2.2.14pre17: it contains the Moxa serial driver. --------------83C536FA66691FC63184323E Content-Type: text/plain; charset=us-ascii; name="patch-oleg" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="patch-oleg"

diff -ur 2.2/arch/i386/kernel/irq.c build-2.2/arch/i386/kernel/irq.c --- 2.2/arch/i386/kernel/irq.c Wed Oct 20 16:59:36 1999 +++ build-2.2/arch/i386/kernel/irq.c Wed Dec 29 00:04:18 1999 @@ -466,26 +466,33 @@ local_flush_tlb(); } } +extern void show_stack(unsigned long* stack); + +static void show_ipi(void* info) +{ + int cpu = smp_processor_id(); + + printk(KERN_EMERG "CPU %d: [irq %d bh %d]\n", + cpu, local_irq_count[cpu], local_bh_count[cpu]); + printk(KERN_EMERG "Stack dump:\n"); + show_stack(NULL); + +} static void show(char * str) { - int i; - unsigned long *stack; int cpu = smp_processor_id(); - extern char *get_options(char *str, int *ints); printk("\n%s, CPU %d:\n", str, cpu); printk("irq: %d [%d %d]\n", atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]); printk("bh: %d [%d %d]\n", atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]); - stack = (unsigned long *) &stack; - for (i = 40; i ; i--) { - unsigned long x = *++stack; - if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) { - printk("<[%08lx]> ", x); - } - } + printk("Stack dump:\n"); + show_stack(NULL); + + smp_call_function(show_ipi, NULL, 0, 1); + printk("%s: spinning again.\n", str); } #define MAXCOUNT 100000000 diff -ur 2.2/arch/i386/kernel/traps.c build-2.2/arch/i386/kernel/traps.c --- 2.2/arch/i386/kernel/traps.c Wed May 19 14:15:07 1999 +++ build-2.2/arch/i386/kernel/traps.c Wed Dec 29 00:04:52 1999 @@ -114,19 +114,58 @@ /* * These constants are for searching for possible module text - * segments. VMALLOC_OFFSET comes from mm/vmalloc.c; MODULE_RANGE is - * a guess of how much space is likely to be vmalloced. + * segments. MODULE_RANGE is a guess of how much space is likely + * to be vmalloced. */ -#define VMALLOC_OFFSET (8*1024*1024) #define MODULE_RANGE (8*1024*1024) +void show_stack(unsigned long * esp) +{ + unsigned long *stack, addr, module_start, module_end; + int i; + + if(esp == NULL) + esp = (unsigned long*)&esp; + stack = esp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (2*PAGE_SIZE-1)) == 0) + break; + if (i && ((i % 8) == 0)) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\nCall Trace: "); + stack = esp; + i = 1; + module_start = VMALLOC_START; + module_end = module_start + MODULE_RANGE; + while (((long) stack & (2*PAGE_SIZE-1)) != 0) { + addr = *stack++; + /* + * If the address is either in the text segment of the + * kernel, or in the region which contains vmalloc'ed + * memory, it *may* be the address of a calling + * routine; if so, print it so that someone tracing + * down the cause of the crash will be able to figure + * out the call path that was taken. + */ + if (((addr >= (unsigned long) &_stext) && + (addr <= (unsigned long) &_etext)) || + ((addr >= module_start) && (addr <= module_end))) { + if (i && ((i % 8) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } +} + static void show_registers(struct pt_regs *regs) { int i; int in_kernel = 1; unsigned long esp; unsigned short ss; - unsigned long *stack, addr, module_start, module_end; esp = (unsigned long) (1+regs); ss = __KERNEL_DS; @@ -152,43 +191,38 @@ * time of the fault.. */ if (in_kernel) { + pgd_t * pgdir; + pmd_t * pgmiddle; + pte_t * pgtable; + printk("\nStack: "); - stack = (unsigned long *) esp; - for(i=0; i < kstack_depth_to_print; i++) { - if (((long) stack & 4095) == 0) - break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *stack++); - } - printk("\nCall Trace: "); - stack = (unsigned long *) esp; - i = 1; - module_start = PAGE_OFFSET + (max_mapnr << PAGE_SHIFT); - module_start = ((module_start + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)); - module_end = module_start + MODULE_RANGE; - while (((long) stack & 4095) != 0) { - addr = *stack++; - /* - * If the address is either in the text segment of the - * kernel, or in the region which contains vmalloc'ed - * memory, it *may* be the address of a calling - * routine; if so, print it so that someone tracing - * down the cause of the crash will be able to figure - * out the call path that was taken. - */ - if (((addr >= (unsigned long) &_stext) && - (addr <= (unsigned long) &_etext)) || - ((addr >= module_start) && (addr <= module_end))) { - if (i && ((i % 8) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } + if(esp >= PAGE_OFFSET && esp < high_memory) + show_stack((unsigned long*)esp); + else + printk("Bad stack pointer."); + printk("\nCode: "); - for(i=0;i<20;i++) - printk("%02x ", ((unsigned char *)regs->eip)[i]); + if(regs->eip < PAGE_OFFSET) + goto bad; + + pgdir = pgd_offset(current->mm,regs->eip); + if(pgd_none(*pgdir) || pgd_bad(*pgdir)) + goto bad; + + pgmiddle = pmd_offset(pgdir,regs->eip); + if(pmd_none(*pgmiddle) || pmd_bad(*pgmiddle)) + goto bad; + + pgtable = pte_offset(pgmiddle,regs->eip); + if(!pte_present(*pgtable)) + { +bad: + printk(" Bad EIP pointer."); + } else + { + for(i=0;i<20;i++) + printk("%02x ", ((unsigned char *)regs->eip)[i]); + } } printk("\n"); }

--------------83C536FA66691FC63184323E--

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/