Re: WARNING: CPU: 4 PID: 863 at include/drm/drm_crtc.h:1577 drm_helper_choose_encoder_dpms+0x88/0x90()

From: Borislav Petkov
Date: Wed Sep 23 2015 - 12:18:46 EST


On Wed, Sep 23, 2015 at 06:06:21PM +0200, Borislav Petkov wrote:
> On Wed, Sep 23, 2015 at 04:44:50PM +0200, Daniel Vetter wrote:
> > sorry I sprinkled the locking stuff in the wrong places. Still confused
> > why the resume side doesn't blow up anywhere
>
> But it does:
>
> [ 69.394204] BUG: unable to handle kernel NULL pointer dereference at 0000000000000034
> [ 69.402080] IP: [<ffffffff81321296>] pci_restore_msi_state+0x196/0x240
> [ 69.408624] PGD 4162b8067 PUD 416581067 PMD 0
> [ 69.413122] Oops: 0000 [#1] PREEMPT SMP
> [ 69.417101] Modules linked in: tun sha256_ssse3 sha256_generic drbg binfmt_misc ipv6 vfat fat fuse dm_crypt dm_mod kv
> m_amd kvm crc32_pclmul aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd amd64_edac_mod edac_mce_amd fa
> m15h_power k10temp amdkfd amd_iommu_v2 radeon acpi_cpufreq
> [ 69.443647] CPU: 4 PID: 814 Comm: kworker/u16:5 Not tainted 4.3.0-rc2+ #3
> [ 69.450430] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 EVO R2.0, BIOS 1503 01/16/2013
> [ 69.460336] Workqueue: events_unbound async_run_entry_fn
> [ 69.465667] task: ffff88042a255f00 ti: ffff880428a68000 task.ti: ffff880428a68000
> [ 69.473145] RIP: 0010:[<ffffffff81321296>] [<ffffffff81321296>] pci_restore_msi_state+0x196/0x240
> [ 69.482131] RSP: 0018:ffff880428a6bc28 EFLAGS: 00010286
> [ 69.487436] RAX: 0000000000000000 RBX: ffff88042a308000 RCX: 0000000000000000
> [ 69.494568] RDX: 0000000000000001 RSI: ffffffff81304448 RDI: ffffffff816c7a1b
> [ 69.501700] RBP: ffff880428a6bc40 R08: 0000000000000001 R09: 0000000000522000
> [ 69.508833] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> [ 69.515965] R13: ffff88042a3087b0 R14: ffff88042a308010 R15: ffff88042a308038
> [ 69.523097] FS: 00007fc91328a700(0000) GS:ffff88042ce00000(0000) knlGS:0000000000000000
> [ 69.531185] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [ 69.536931] CR2: 0000000000000034 CR3: 00000004164c7000 CR4: 00000000000406e0
> [ 69.544061] Stack:
> [ 69.546073] 0080002c2a3087b0 0000000000000000 ffff88042a308000 ffff880428a6bc78
> [ 69.553525] ffffffff8130c141 ffff88042a308098 ffff88042a308000 0000000000000000
> [ 69.560996] ffff8804284e77a8 ffffffff81961ef1 ffff880428a6bc88 ffffffff8130c2b8
> [ 69.568450] Call Trace:
> [ 69.571044] [<ffffffff8130c141>] pci_restore_state.part.18+0xf1/0x250
> [ 69.577706] [<ffffffff8130c2b8>] pci_restore_state+0x18/0x20
> [ 69.583591] [<ffffffff8130f7fc>] pci_pm_restore_noirq+0x4c/0xd0
> [ 69.589734] [<ffffffff8130f7b0>] ? pci_pm_freeze_noirq+0xf0/0xf0
> [ 69.595966] [<ffffffff8146e847>] dpm_run_callback+0x77/0x2a0
> [ 69.601850] [<ffffffff8146eb03>] device_resume_noirq+0x93/0x150
> [ 69.607994] [<ffffffff8146ebdd>] async_resume_noirq+0x1d/0x50
> [ 69.613967] [<ffffffff81078a06>] async_run_entry_fn+0x46/0xf0
> [ 69.619939] [<ffffffff8106f548>] process_one_work+0x1f8/0x640
> [ 69.625910] [<ffffffff8106f4a4>] ? process_one_work+0x154/0x640
> [ 69.632054] [<ffffffff8106f9db>] worker_thread+0x4b/0x440
> [ 69.637677] [<ffffffff8106f990>] ? process_one_work+0x640/0x640
> [ 69.643822] [<ffffffff81075e86>] kthread+0xf6/0x110
> [ 69.648927] [<ffffffff81075d90>] ? kthread_create_on_node+0x1f0/0x1f0
> [ 69.655591] [<ffffffff816c893f>] ret_from_fork+0x3f/0x70
> [ 69.661128] [<ffffffff81075d90>] ? kthread_create_on_node+0x1f0/0x1f0
> [ 69.667794] Code: 66 89 4d ee 0f b7 c9 e8 79 41 fe ff 48 89 df e8 d1 7a ce ff 0f b6 53 4b 8b 73 38 48 8d 4d ee 48 8b 7b 10 83 c2 02 e8 1a 31 fe ff <41> 0f b6 4c 24 34 41 8b 54 24 30 be ff ff ff ff c0 e9 04 83 e1
> [ 69.687986] RIP [<ffffffff81321296>] pci_restore_msi_state+0x196/0x240
> [ 69.694772] RSP <ffff880428a6bc28>
> [ 69.698412] CR2: 0000000000000034
> [ 69.701879] ---[ end trace 814dd8cc56e427ae ]---

Ok, after some quick staring, we're at __pci_restore_msi_state():

pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
entry->masked);

which is:

.loc 1 411 0
movq %rbx, %rdi # dev,
call arch_restore_msi_irqs #
.LBB1921:
.LBB1922:
.loc 2 902 0
movzbl 75(%rbx), %edx # dev_2(D)->msi_cap, D.31945
movl 56(%rbx), %esi # MEM[(const struct pci_dev *)dev_2(D)].devfn, MEM[(const struct pci_dev *)dev_2(D)].devfn
leaq -18(%rbp), %rcx #, tmp266
movq 16(%rbx), %rdi # MEM[(const struct pci_dev *)dev_2(D)].bus, MEM[(const struct pci_dev *)dev_2(D)].bus
addl $2, %edx #, D.31945
call pci_bus_read_config_word #
.LBE1922:
.LBE1921:
.loc 1 414 0
movzbl 52(%r12), %ecx # *_85, tmp208 <--- faulting insn
movl 48(%r12), %edx # _85->D.27233.D.27231.masked, D.31946
.LBB1923:
.LBB1924:
.loc 1 176 0
movl $-1, %esi #, D.31951

and that %r12 is supposed to contain struct msi_desc *entry in
__pci_restore_msi_state():

entry = irq_get_msi_desc(dev->irq);

which is

.loc 4 654 0
movl 1340(%rdi), %edi # dev_2(D)->irq, dev_2(D)->irq
call irq_get_irq_data #
.loc 4 655 0
testq %rax, %rax # d
je .L405 #,
movq 16(%rax), %rax # d_62->common, d_62->common
movq 16(%rax), %r12 # _63->msi_desc, D.31954

but as we see above %r12 is 0.

For some reason that entry thing in __pci_restore_msi_state() is not
checked for NULL even though irq_get_msi_desc() can return NULL.

Maybe tglx would have an idea...

Hrmmm.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/