Re: [BUG] soft lockup detected with ipcs

From: Jiri Olsa
Date: Mon Mar 03 2008 - 07:01:10 EST


Jiri Kosina wrote:
> On Sun, 2 Mar 2008, Jiri Kosina wrote:
>
>> Could you please try what is the output with the attached patch below, so
>> that we know which spinlock buggers?
>
> Uhm, and hereby I attach the patch.
>
> diff --git a/ipc/shm.c b/ipc/shm.c
> index c47e872..2abfb70 100644
> --- a/ipc/shm.c
> +++ b/ipc/shm.c
> @@ -615,6 +615,7 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
> *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
> } else {
> struct shmem_inode_info *info = SHMEM_I(inode);
> + printk(KERN_DEBUG "info: %p\n", info);
> spin_lock(&info->lock);
> *rss += inode->i_mapping->nrpages;
> *swp += info->swapped;
> diff --git a/ipc/util.c b/ipc/util.c
> index fd1b50d..5a0d4d2 100644
> --- a/ipc/util.c
> +++ b/ipc/util.c
> @@ -662,6 +662,7 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
>
> up_read(&ids->rw_mutex);
>
> + printk(KERN_DEBUG "out: %p\n", out);
> spin_lock(&out->lock);
>
> /* ipc_rmid() may have already freed the ID while ipc_lock

I'll try the patch, so far I attached the ipcs strace and adjacent kernel logs.
execve("/usr/bin/ipcs", ["ipcs"], [/* 37 vars */]) = 0
brk(0) = 0x804e000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=60196, ...}) = 0
mmap2(NULL, 60196, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f2c000
close(3) = 0
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@e\1\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1261852, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f2b000
mmap2(NULL, 1267248, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7df5000
mmap2(0xb7f25000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x130) = 0xb7f25000
mmap2(0xb7f28000, 9776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f28000
close(3) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7df4000
set_thread_area({entry_number:-1 -> 6, base_addr:0xb7df46c0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
mprotect(0xb7f25000, 8192, PROT_READ) = 0
mprotect(0x804c000, 4096, PROT_READ) = 0
mprotect(0xb7f56000, 4096, PROT_READ) = 0
munmap(0xb7f2c000, 60196) = 0
brk(0) = 0x804e000
brk(0x806f000) = 0x806f000
fstat64(1, {st_mode=S_IFCHR|0622, st_rdev=makedev(136, 0), ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f3a000
write(1, "\n", 1) = 1
shmctl(0, IPC_64|SHM_INFO, 0xbfd9e9e0 <unfinished ...>
+++ killed by SIGSEGV +++
[ 85.868123] BUG: unable to handle kernel NULL pointer dereference at virtual address 000000bf
[ 85.868149] printing eip: c0133034 *pde = 00000000
[ 85.868171] Oops: 0002 [#1]
[ 85.868184] Modules linked in: netconsole i915 drm configfs snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_timer snd snd_page_alloc ipw2200 e1000
[ 85.868282]
[ 85.868290] Pid: 4973, comm: ipcs Not tainted (2.6.24.3test-dirty #19)
[ 85.868302] EIP: 0060:[<c0133034>] EFLAGS: 00010086 CPU: 0
[ 85.868315] EIP is at __lock_acquire+0x319/0xc20
[ 85.868323] EAX: 00000002 EBX: 00000246 ECX: def4e4d4 EDX: 00000002
[ 85.868331] ESI: ffffffff EDI: 00000000 EBP: d8fcfe7c ESP: d8fcfe20
[ 85.868339] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
[ 85.868347] Process ipcs (pid: 4973, ti=d8fce000 task=dd96d0d0 task.ti=d8fce000)
[ 85.868354] Stack: c01338f4 d8fcfe88 c01338f4 d8fcfe4c 00000347 00000000 00000002 00000000
[ 85.868421] 00000000 def4e4d4 00000000 dd96d0d0 00000001 dd96d728 d8fcfebc c01338f4
[ 85.868491] 00000000 dd96d728 00000347 00000000 00000246 00000000 00000000 d8fcfea4
[ 85.868553] Call Trace:
[ 85.868565] [<c0103e9f>] show_trace_log_lvl+0x1a/0x2f
[ 85.868582] [<c0103f51>] show_stack_log_lvl+0x9d/0xa5
[ 85.868606] [<c0104006>] show_registers+0xad/0x17c
[ 85.868620] [<c01041cc>] die+0xf7/0x1c8
[ 85.868633] [<c0110e31>] do_page_fault+0x464/0x54b
[ 85.868648] [<c036259a>] error_code+0x6a/0x70
[ 85.868667] [<c01339b3>] lock_acquire+0x78/0x91
[ 85.868680] [<c0361f65>] _spin_lock+0x2e/0x58
[ 85.868693] [<c01dc5a6>] sys_shmctl+0x6f8/0x776
[ 85.868713] [<c0106d33>] sys_ipc+0x19f/0x1b5
[ 85.868727] [<c0102fa6>] syscall_call+0x7/0xb
[ 85.868740] =======================
[ 85.868747] Code: 00 85 c0 0f 84 1d 09 00 00 83 3d 20 b4 78 c0 00 0f 85 10 09 00 00 c7 44 24 0c c8 ad 36 c0 c7 44 24 08 26 03 00 00 e9 8b 07 00 00 <ff> 86 c0 00 00 00 8b 45 d0 8b 80 54 06 00 00 83 f8 1d 89 45 cc
[ 85.869152] EIP: [<c0133034>] __lock_acquire+0x319/0xc20 SS:ESP 0068:d8fcfe20
[ 85.869179] ---[ end trace 807f13dee3166302 ]---
[ 111.659866] BUG: soft lockup - CPU#0 stuck for 11s! [ipcs:4979]
[ 111.659881]
[ 111.659889] Pid: 4979, comm: ipcs Tainted: G D (2.6.24.3test-dirty #19)
[ 111.659898] EIP: 0060:[<c01f2672>] EFLAGS: 00000246 CPU: 0
[ 111.659911] EIP is at delay_tsc+0xe/0x17
[ 111.659919] EAX: 98b6bb8f EBX: 00000001 ECX: 98b6bb5c EDX: 00000029
[ 111.659927] ESI: 093ca0bf EDI: 00000001 EBP: d8f63e70 ESP: d8f63e6c
[ 111.659935] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
[ 111.659943] CR0: 8005003b CR2: 08053c30 CR3: 1e86c000 CR4: 000006d0
[ 111.659951] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[ 111.659959] DR6: ffff0ff0 DR7: 00000400
[ 111.659967] [<c0103e9f>] show_trace_log_lvl+0x1a/0x2f
[ 111.659985] [<c0104784>] show_trace+0x12/0x14
[ 111.659998] [<c010163e>] show_regs+0x1c/0x1f
[ 111.660011] [<c013b9f8>] softlockup_tick+0xce/0xe7
[ 111.660026] [<c01202a7>] run_local_timers+0x12/0x14
[ 111.660042] [<c01204b6>] update_process_times+0x1b/0x3f
[ 111.660056] [<c012f3a3>] tick_periodic+0x65/0x71
[ 111.660072] [<c012f3c1>] tick_handle_periodic+0x12/0x61
[ 111.660088] [<c0105d27>] timer_interrupt+0x37/0x3e
[ 111.660103] [<c013bcae>] handle_IRQ_event+0x1a/0x46
[ 111.660118] [<c013d0e3>] handle_level_irq+0x6f/0xb8
[ 111.660132] [<c010517a>] do_IRQ+0x78/0x8f
[ 111.660145] [<c01039b6>] common_interrupt+0x2e/0x34
[ 111.660159] [<c01f2649>] __delay+0x9/0xb
[ 111.660173] [<c01ffe19>] _raw_spin_lock+0x7f/0xd8
[ 111.660190] [<c0361f81>] _spin_lock+0x4a/0x58
[ 111.660207] [<c01dc5a6>] sys_shmctl+0x6f8/0x776
[ 111.660223] [<c0106d33>] sys_ipc+0x19f/0x1b5
[ 111.660237] [<c0102fa6>] syscall_call+0x7/0xb
[ 111.660253] =======================
[ 114.335266] ipw2200: Firmware error detected. Restarting.
[ 114.814862] ipw2200: Firmware error detected. Restarting.
[ 123.462514] BUG: soft lockup - CPU#0 stuck for 11s! [ipcs:4979]
[ 123.462522]
[ 123.462529] Pid: 4979, comm: ipcs Tainted: G D (2.6.24.3test-dirty #19)
[ 123.462537] EIP: 0060:[<c01f266e>] EFLAGS: 00000246 CPU: 0
[ 123.462545] EIP is at delay_tsc+0xa/0x17
[ 123.462553] EAX: fe4d63d4 EBX: 00000001 ECX: fe4d63d4 EDX: 0000002d
[ 123.462561] ESI: 1316f741 EDI: 00000001 EBP: d8f63e70 ESP: d8f63e6c
[ 123.462569] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
[ 123.462576] CR0: 8005003b CR2: 08053c30 CR3: 1e86c000 CR4: 000006d0
[ 123.462584] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[ 123.462591] DR6: ffff0ff0 DR7: 00000400
[ 123.462598] [<c0103e9f>] show_trace_log_lvl+0x1a/0x2f
[ 123.462612] [<c0104784>] show_trace+0x12/0x14
[ 123.462626] [<c010163e>] show_regs+0x1c/0x1f
[ 123.462639] [<c013b9f8>] softlockup_tick+0xce/0xe7
[ 123.462653] [<c01202a7>] run_local_timers+0x12/0x14
[ 123.462667] [<c01204b6>] update_process_times+0x1b/0x3f
[ 123.462681] [<c012f3a3>] tick_periodic+0x65/0x71
[ 123.462696] [<c012f3c1>] tick_handle_periodic+0x12/0x61
[ 123.462710] [<c0105d27>] timer_interrupt+0x37/0x3e
[ 123.462724] [<c013bcae>] handle_IRQ_event+0x1a/0x46
[ 123.462738] [<c013d0e3>] handle_level_irq+0x6f/0xb8
[ 123.462751] [<c010517a>] do_IRQ+0x78/0x8f
[ 123.462765] [<c01039b6>] common_interrupt+0x2e/0x34
[ 123.462779] [<c01f2649>] __delay+0x9/0xb
[ 123.462792] [<c01ffe19>] _raw_spin_lock+0x7f/0xd8
[ 123.462806] [<c0361f81>] _spin_lock+0x4a/0x58
[ 123.462819] [<c01dc5a6>] sys_shmctl+0x6f8/0x776
[ 123.462833] [<c0106d33>] sys_ipc+0x19f/0x1b5
[ 123.462846] [<c0102fa6>] syscall_call+0x7/0xb
[ 123.462859] =======================
[ 147.063819] BUG: soft lockup - CPU#0 stuck for 11s! [ipcs:4979]
[ 147.063827]
[ 147.063834] Pid: 4979, comm: ipcs Tainted: G D (2.6.24.3test-dirty #19)
[ 147.063842] EIP: 0060:[<c01f266c>] EFLAGS: 00000246 CPU: 0
[ 147.063850] EIP is at delay_tsc+0x8/0x17
[ 147.063858] EAX: c9192061 EBX: 00000001 ECX: c9191fef EDX: 00000036
[ 147.063866] ESI: 26cb024e EDI: 00000001 EBP: d8f63e70 ESP: d8f63e6c
[ 147.063873] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
[ 147.063881] CR0: 8005003b CR2: 08053c30 CR3: 1e86c000 CR4: 000006d0
[ 147.063889] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[ 147.063896] DR6: ffff0ff0 DR7: 00000400
[ 147.063903] [<c0103e9f>] show_trace_log_lvl+0x1a/0x2f
[ 147.063917] [<c0104784>] show_trace+0x12/0x14
[ 147.063930] [<c010163e>] show_regs+0x1c/0x1f
[ 147.063944] [<c013b9f8>] softlockup_tick+0xce/0xe7
[ 147.063958] [<c01202a7>] run_local_timers+0x12/0x14
[ 147.063972] [<c01204b6>] update_process_times+0x1b/0x3f
[ 147.063986] [<c012f3a3>] tick_periodic+0x65/0x71
[ 147.064000] [<c012f3c1>] tick_handle_periodic+0x12/0x61
[ 147.064014] [<c0105d27>] timer_interrupt+0x37/0x3e
[ 147.064027] [<c013bcae>] handle_IRQ_event+0x1a/0x46
[ 147.064041] [<c013d0e3>] handle_level_irq+0x6f/0xb8
[ 147.064055] [<c010517a>] do_IRQ+0x78/0x8f
[ 147.064068] [<c01039b6>] common_interrupt+0x2e/0x34
[ 147.064081] [<c01f2649>] __delay+0x9/0xb
[ 147.064095] [<c01ffe19>] _raw_spin_lock+0x7f/0xd8
[ 147.064109] [<c0361f81>] _spin_lock+0x4a/0x58
[ 147.064122] [<c01dc5a6>] sys_shmctl+0x6f8/0x776
[ 147.064135] [<c0106d33>] sys_ipc+0x19f/0x1b5
[ 147.064148] [<c0102fa6>] syscall_call+0x7/0xb
[ 147.064162] =======================
[ 158.862473] BUG: soft lockup - CPU#0 stuck for 11s! [ipcs:4979]
[ 158.862481]
[ 158.862488] Pid: 4979, comm: ipcs Tainted: G D (2.6.24.3test-dirty #19)
[ 158.862496] EIP: 0060:[<c01f2674>] EFLAGS: 00000206 CPU: 0
[ 158.862504] EIP is at delay_tsc+0x10/0x17
[ 158.862511] EAX: 00000033 EBX: 00000001 ECX: 2e4e3416 EDX: 0000003b
[ 158.862519] ESI: 30a4aa86 EDI: 00000001 EBP: d8f63e70 ESP: d8f63e6c
[ 158.862527] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
[ 158.862535] CR0: 8005003b CR2: 08053c30 CR3: 1e86c000 CR4: 000006d0
[ 158.862542] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[ 158.862550] DR6: ffff0ff0 DR7: 00000400
[ 158.862556] [<c0103e9f>] show_trace_log_lvl+0x1a/0x2f
[ 158.862570] [<c0104784>] show_trace+0x12/0x14
[ 158.862584] [<c010163e>] show_regs+0x1c/0x1f
[ 158.862598] [<c013b9f8>] softlockup_tick+0xce/0xe7
[ 158.862611] [<c01202a7>] run_local_timers+0x12/0x14
[ 158.862625] [<c01204b6>] update_process_times+0x1b/0x3f
[ 158.862640] [<c012f3a3>] tick_periodic+0x65/0x71
[ 158.862654] [<c012f3c1>] tick_handle_periodic+0x12/0x61
[ 158.862667] [<c0105d27>] timer_interrupt+0x37/0x3e
[ 158.862681] [<c013bcae>] handle_IRQ_event+0x1a/0x46
[ 158.862694] [<c013d0e3>] handle_level_irq+0x6f/0xb8
[ 158.862708] [<c010517a>] do_IRQ+0x78/0x8f
[ 158.862722] [<c01039b6>] common_interrupt+0x2e/0x34
[ 158.862735] [<c01f2649>] __delay+0x9/0xb
[ 158.862749] [<c01ffe19>] _raw_spin_lock+0x7f/0xd8
[ 158.862763] [<c0361f81>] _spin_lock+0x4a/0x58
[ 158.862776] [<c01dc5a6>] sys_shmctl+0x6f8/0x776
[ 158.862789] [<c0106d33>] sys_ipc+0x19f/0x1b5
[ 158.862802] [<c0102fa6>] syscall_call+0x7/0xb
[ 158.862817] =======================