[PATCH] Debug: Gather more information about AMD iommu device

From: Jiang Liu
Date: Wed Sep 30 2015 - 02:49:29 EST


Hi Boris,
From the log file, we got to know that the NULL pointer dereference
was caused by AMD IOMMU device. For normal MSI-enabled PCI devices, we get
valid irq numbers such as:
[ 74.661170] ahci 0000:04:00.0: irqdomain: freeze msi 1 irq28
[ 74.661297] radeon 0000:01:00.0: irqdomain: freeze msi 1 irq47
But for AMD IOMMU device, we got an invalid irq number(0) after
enabling MSI as:
[ 74.662488] pci 0000:00:00.2: irqdomain: freeze msi 1 irq0
which then caused NULL pointer deference when __pci_restore_msi_state()
gets called by system resume code.
So we need to figure out why we got irq number 0 after enabling
MSI for AMD IOMMU device. The only hint I got is that iommu driver just
grabbing the PCI device without providing a PCI device driver for IOMMU
PCI device, we have solved a similar case for eata driver. So could you
please help to apply this debug patch to gather more info and send me
/proc/interrupts?
Thanks!
Gerry

Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
---
arch/x86/kernel/apic/msi.c | 6 +++++-
drivers/iommu/amd_iommu_init.c | 8 ++++++++
drivers/pci/msi.c | 4 ++++
kernel/irq/msi.c | 1 +
4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 5f1feb6854af..050dcf25577c 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -71,6 +71,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
struct irq_domain *domain;
struct irq_alloc_info info;
+ int ret;

init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_MSI;
@@ -82,7 +83,10 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (domain == NULL)
return -ENOSYS;

- return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
+ ret = pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
+ dev_warn(&dev->dev, "irqdomain: domain %p, def_domain %p ret%d\n",
+ domain, msi_default_domain, ret);
+ return ret;
}

void native_teardown_msi_irq(unsigned int irq)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5ef347a13cb5..23cd4d861dba 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1416,7 +1416,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
{
int r;

+ dev_warn(&iommu->dev->dev, "irqdomain: before enabling MSI for msi%d, irq%d\n",
+ iommu->dev->msi_enabled, iommu->dev->irq);
r = pci_enable_msi(iommu->dev);
+ dev_warn(&iommu->dev->dev, "irqdomain: after enabling MSI for msi%d, irq%d\n",
+ iommu->dev->msi_enabled, iommu->dev->irq);
if (r)
return r;

@@ -1428,6 +1432,8 @@ static int iommu_setup_msi(struct amd_iommu *iommu)

if (r) {
pci_disable_msi(iommu->dev);
+ dev_warn(&iommu->dev->dev, "irqdomain: failed to enable MSI for msi%d, irq%d\n",
+ iommu->dev->msi_enabled, iommu->dev->irq);
return r;
}

@@ -1440,6 +1446,8 @@ static int iommu_init_msi(struct amd_iommu *iommu)
{
int ret;

+ dev_warn(&iommu->dev->dev, "irqdomain: init msi for iommu %p int_enabled%d\n",
+ iommu, iommu->int_enabled);
if (iommu->int_enabled)
goto enable_faults;

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d4497141d083..0301a18663b0 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -602,6 +602,8 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
int ret;
unsigned mask;

+ dev_warn(&dev->dev, "irqdomain: enable msi cap msi_enabled%d irq%d\n",
+ dev->msi_enabled, dev->irq);
pci_msi_set_enable(dev, 0); /* Disable MSI during set up */

entry = msi_setup_entry(dev, nvec);
@@ -643,6 +645,8 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)

pcibios_free_irq(dev);
dev->irq = entry->irq;
+ dev_warn(&dev->dev, "irqdomain: succeed to enable msi cap msi_enabled%d irq%d\n",
+ dev->msi_enabled, dev->irq);
return 0;
}

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 7e6512b9dc1f..535cf59bc5a7 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -298,6 +298,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
return ret;
}

+ dev_warn(dev, "irqdomain: allocated virq%d\n", virq);
for (i = 0; i < desc->nvec_used; i++)
irq_set_msi_desc_off(virq, i, desc);
}
--
1.7.10.4


--------------020904080203070301050609--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/