Re: WARNING: CPU: 4 PID: 863 at include/drm/drm_crtc.h:1577 drm_helper_choose_encoder_dpms+0x88/0x90() - evildoer found and neutralized

From: Borislav Petkov
Date: Sat Sep 26 2015 - 12:47:10 EST


On Wed, Sep 23, 2015 at 06:18:39PM +0200, Borislav Petkov wrote:
> On Wed, Sep 23, 2015 at 06:06:21PM +0200, Borislav Petkov wrote:
> > On Wed, Sep 23, 2015 at 04:44:50PM +0200, Daniel Vetter wrote:
> > > sorry I sprinkled the locking stuff in the wrong places. Still confused
> > > why the resume side doesn't blow up anywhere
> >
> > But it does:
> >
> > [ 69.394204] BUG: unable to handle kernel NULL pointer dereference at 0000000000000034
> > [ 69.402080] IP: [<ffffffff81321296>] pci_restore_msi_state+0x196/0x240
> > [ 69.408624] PGD 4162b8067 PUD 416581067 PMD 0
> > [ 69.413122] Oops: 0000 [#1] PREEMPT SMP
> > [ 69.417101] Modules linked in: tun sha256_ssse3 sha256_generic drbg binfmt_misc ipv6 vfat fat fuse dm_crypt dm_mod kv
> > m_amd kvm crc32_pclmul aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd amd64_edac_mod edac_mce_amd fa
> > m15h_power k10temp amdkfd amd_iommu_v2 radeon acpi_cpufreq
> > [ 69.443647] CPU: 4 PID: 814 Comm: kworker/u16:5 Not tainted 4.3.0-rc2+ #3
> > [ 69.450430] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 EVO R2.0, BIOS 1503 01/16/2013
> > [ 69.460336] Workqueue: events_unbound async_run_entry_fn
> > [ 69.465667] task: ffff88042a255f00 ti: ffff880428a68000 task.ti: ffff880428a68000
> > [ 69.473145] RIP: 0010:[<ffffffff81321296>] [<ffffffff81321296>] pci_restore_msi_state+0x196/0x240
> > [ 69.482131] RSP: 0018:ffff880428a6bc28 EFLAGS: 00010286
> > [ 69.487436] RAX: 0000000000000000 RBX: ffff88042a308000 RCX: 0000000000000000
> > [ 69.494568] RDX: 0000000000000001 RSI: ffffffff81304448 RDI: ffffffff816c7a1b
> > [ 69.501700] RBP: ffff880428a6bc40 R08: 0000000000000001 R09: 0000000000522000
> > [ 69.508833] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> > [ 69.515965] R13: ffff88042a3087b0 R14: ffff88042a308010 R15: ffff88042a308038
> > [ 69.523097] FS: 00007fc91328a700(0000) GS:ffff88042ce00000(0000) knlGS:0000000000000000
> > [ 69.531185] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> > [ 69.536931] CR2: 0000000000000034 CR3: 00000004164c7000 CR4: 00000000000406e0
> > [ 69.544061] Stack:
> > [ 69.546073] 0080002c2a3087b0 0000000000000000 ffff88042a308000 ffff880428a6bc78
> > [ 69.553525] ffffffff8130c141 ffff88042a308098 ffff88042a308000 0000000000000000
> > [ 69.560996] ffff8804284e77a8 ffffffff81961ef1 ffff880428a6bc88 ffffffff8130c2b8
> > [ 69.568450] Call Trace:
> > [ 69.571044] [<ffffffff8130c141>] pci_restore_state.part.18+0xf1/0x250
> > [ 69.577706] [<ffffffff8130c2b8>] pci_restore_state+0x18/0x20
> > [ 69.583591] [<ffffffff8130f7fc>] pci_pm_restore_noirq+0x4c/0xd0
> > [ 69.589734] [<ffffffff8130f7b0>] ? pci_pm_freeze_noirq+0xf0/0xf0
> > [ 69.595966] [<ffffffff8146e847>] dpm_run_callback+0x77/0x2a0
> > [ 69.601850] [<ffffffff8146eb03>] device_resume_noirq+0x93/0x150
> > [ 69.607994] [<ffffffff8146ebdd>] async_resume_noirq+0x1d/0x50
> > [ 69.613967] [<ffffffff81078a06>] async_run_entry_fn+0x46/0xf0
> > [ 69.619939] [<ffffffff8106f548>] process_one_work+0x1f8/0x640
> > [ 69.625910] [<ffffffff8106f4a4>] ? process_one_work+0x154/0x640
> > [ 69.632054] [<ffffffff8106f9db>] worker_thread+0x4b/0x440
> > [ 69.637677] [<ffffffff8106f990>] ? process_one_work+0x640/0x640
> > [ 69.643822] [<ffffffff81075e86>] kthread+0xf6/0x110
> > [ 69.648927] [<ffffffff81075d90>] ? kthread_create_on_node+0x1f0/0x1f0
> > [ 69.655591] [<ffffffff816c893f>] ret_from_fork+0x3f/0x70
> > [ 69.661128] [<ffffffff81075d90>] ? kthread_create_on_node+0x1f0/0x1f0
> > [ 69.667794] Code: 66 89 4d ee 0f b7 c9 e8 79 41 fe ff 48 89 df e8 d1 7a ce ff 0f b6 53 4b 8b 73 38 48 8d 4d ee 48 8b 7b 10 83 c2 02 e8 1a 31 fe ff <41> 0f b6 4c 24 34 41 8b 54 24 30 be ff ff ff ff c0 e9 04 83 e1
> > [ 69.687986] RIP [<ffffffff81321296>] pci_restore_msi_state+0x196/0x240
> > [ 69.694772] RSP <ffff880428a6bc28>
> > [ 69.698412] CR2: 0000000000000034
> > [ 69.701879] ---[ end trace 814dd8cc56e427ae ]---
>
> Ok, after some quick staring, we're at __pci_restore_msi_state():
>
> pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
> msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
> entry->masked);
>
> which is:
>
> .loc 1 411 0
> movq %rbx, %rdi # dev,
> call arch_restore_msi_irqs #
> .LBB1921:
> .LBB1922:
> .loc 2 902 0
> movzbl 75(%rbx), %edx # dev_2(D)->msi_cap, D.31945
> movl 56(%rbx), %esi # MEM[(const struct pci_dev *)dev_2(D)].devfn, MEM[(const struct pci_dev *)dev_2(D)].devfn
> leaq -18(%rbp), %rcx #, tmp266
> movq 16(%rbx), %rdi # MEM[(const struct pci_dev *)dev_2(D)].bus, MEM[(const struct pci_dev *)dev_2(D)].bus
> addl $2, %edx #, D.31945
> call pci_bus_read_config_word #
> .LBE1922:
> .LBE1921:
> .loc 1 414 0
> movzbl 52(%r12), %ecx # *_85, tmp208 <--- faulting insn
> movl 48(%r12), %edx # _85->D.27233.D.27231.masked, D.31946
> .LBB1923:
> .LBB1924:
> .loc 1 176 0
> movl $-1, %esi #, D.31951
>
> and that %r12 is supposed to contain struct msi_desc *entry in
> __pci_restore_msi_state():
>
> entry = irq_get_msi_desc(dev->irq);
>
> which is
>
> .loc 4 654 0
> movl 1340(%rdi), %edi # dev_2(D)->irq, dev_2(D)->irq
> call irq_get_irq_data #
> .loc 4 655 0
> testq %rax, %rax # d
> je .L405 #,
> movq 16(%rax), %rax # d_62->common, d_62->common
> movq 16(%rax), %r12 # _63->msi_desc, D.31954
>
> but as we see above %r12 is 0.
>
> For some reason that entry thing in __pci_restore_msi_state() is not
> checked for NULL even though irq_get_msi_desc() can return NULL.

Ok, I bisected it.

First of all, Daniel, you didn't see the resume side blow up because
of the NULL ptr deref f*cking up the box much earlier. Once I reverted
the bad commit by hand (it wouldn't revert cleanly) the resume splats
showed.

And in talking about the bad commit, it is this one:

991de2e59090e55c65a7f59a049142e3c480f7bd is the first bad commit
commit 991de2e59090e55c65a7f59a049142e3c480f7bd
Author: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
Date: Wed Jun 10 16:54:59 2015 +0800

PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()

To support IOAPIC hotplug, we need to allocate PCI IRQ resources on demand
and free them when not used anymore.

Implement pcibios_alloc_irq() and pcibios_free_irq() to dynamically
allocate and free PCI IRQs.

Remove mp_should_keep_irq(), which is no longer used.

[bhelgaas: changelog]
Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
Signed-off-by: Bjorn Helgaas <bhelgaas@xxxxxxxxxx>
Acked-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

:040000 040000 765e2d5232d53247ec260b34b51589c3bccb36ae f680234a27685e94b1a35ae2a7218f8eafa9071a M arch
:040000 040000 d55a682bcde72682e883365e88ad1df6186fd54d f82c470a04a6845fcf5e0aa934512c75628f798d M drivers

Jiang, you have to stop breaking my box with your changes. This is
maybe the third time I'm bisecting fallout from your patches. If you're
touching all x86, you need to test on an AMD box too. Like everyone else
testing on the hardware their changes affect. It is that simple.

Anyway, reverting that commit by hand fixes my resume splat.

Here's the partial revert I did by hand:

---
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..164e3f8d3c3d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);

+extern bool mp_should_keep_irq(struct device *dev);
+
struct pci_raw_ops {
int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 09d3afc0a181..3bff24438b00 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -672,20 +672,22 @@ int pcibios_add_device(struct pci_dev *dev)
return 0;
}

-int pcibios_alloc_irq(struct pci_dev *dev)
+int pcibios_enable_device(struct pci_dev *dev, int mask)
{
- return pcibios_enable_irq(dev);
-}
+ int err;

-void pcibios_free_irq(struct pci_dev *dev)
-{
- if (pcibios_disable_irq)
- pcibios_disable_irq(dev);
+ if ((err = pci_enable_resources(dev, mask)) < 0)
+ return err;
+
+ if (!pci_dev_msi_enabled(dev))
+ return pcibios_enable_irq(dev);
+ return 0;
}

-int pcibios_enable_device(struct pci_dev *dev, int mask)
+void pcibios_disable_device (struct pci_dev *dev)
{
- return pci_enable_resources(dev, mask);
+ if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+ pcibios_disable_irq(dev);
}

int pci_ext_cfg_avail(void)
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 32e70343e6fd..f229834b36d4 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1186,6 +1186,18 @@ void pcibios_penalize_isa_irq(int irq, int active)
pirq_penalize_isa_irq(irq, active);
}

+bool mp_should_keep_irq(struct device *dev)
+{
+ if (dev->power.is_prepared)
+ return true;
+#ifdef CONFIG_PM
+ if (dev->power.runtime_status == RPM_SUSPENDING)
+ return true;
+#endif
+
+ return false;
+}
+
static int pirq_enable_irq(struct pci_dev *dev)
{
u8 pin = 0;
@@ -1258,7 +1270,8 @@ static int pirq_enable_irq(struct pci_dev *dev)

static void pirq_disable_irq(struct pci_dev *dev)
{
- if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
+ if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+ dev->irq_managed && dev->irq) {
mp_unmap_irq(dev->irq);
pci_reset_managed_irq(dev);
}
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 6da0f9beab19..d8a3f49a960c 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -479,6 +479,14 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
if (!pin || !pci_has_managed_irq(dev))
return;

+ /* Keep IOAPIC pin configuration when suspending */
+ if (dev->dev.power.is_prepared)
+ return;
+#ifdef CONFIG_PM
+ if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+ return;
+#endif
+
entry = acpi_pci_irq_lookup(dev, pin);
if (!entry)
return;

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/