[PATCH 09/13] iommu/intel: Update to use PCI DMA aliases

From: Alex Williamson
Date: Thu May 01 2014 - 12:29:33 EST


VT-d code currently makes use of pci_find_upstream_pcie_bridge() in
order to find the topology based alias of a device. This function has
a few problems. First, it doesn't check the entire alias path of the
device to the root bus, therefore if a PCIe device is masked upstream,
the wrong result is produced. Also, it's known to get confused and
give up when it crosses a bridge from a conventional PCI bus to a PCIe
bus that lacks a PCIe capability. The PCI-core provided DMA alias
support solves both of these problems and additionally adds support
for DMA function quirks allowing VT-d to work with devices like
Marvell and Ricoh with known broken requester IDs.

Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
Cc: David Woodhouse <David.Woodhouse@xxxxxxxxx>
---
drivers/iommu/intel-iommu.c | 222 ++++++++++++++++-------------------
drivers/iommu/intel_irq_remapping.c | 55 ++++++---
2 files changed, 139 insertions(+), 138 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f0e50f1..d87b3c9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1840,54 +1840,56 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
return 0;
}

+struct domain_context_mapping_data {
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ int translation;
+};
+
+static int domain_context_mapping_cb(struct pci_dev *pdev,
+ u16 alias, void *opaque)
+{
+ struct domain_context_mapping_data *data = opaque;
+
+ return domain_context_mapping_one(data->domain, data->iommu,
+ PCI_BUS_NUM(alias), alias & 0xff,
+ data->translation);
+}
+
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev,
int translation)
{
- int ret;
- struct pci_dev *pdev, *tmp, *parent;
struct intel_iommu *iommu;
u8 bus, devfn;
+ struct domain_context_mapping_data data;

iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;

- ret = domain_context_mapping_one(domain, iommu, bus, devfn,
- translation);
- if (ret || !dev_is_pci(dev))
- return ret;
-
- /* dependent device mapping */
- pdev = to_pci_dev(dev);
- tmp = pci_find_upstream_pcie_bridge(pdev);
- if (!tmp)
- return 0;
- /* Secondary interface's bus number and devfn 0 */
- parent = pdev->bus->self;
- while (parent != tmp) {
- ret = domain_context_mapping_one(domain, iommu,
- parent->bus->number,
- parent->devfn, translation);
- if (ret)
- return ret;
- parent = parent->bus->self;
- }
- if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
- return domain_context_mapping_one(domain, iommu,
- tmp->subordinate->number, 0,
- translation);
- else /* this is a legacy PCI bridge */
- return domain_context_mapping_one(domain, iommu,
- tmp->bus->number,
- tmp->devfn,
+ if (!dev_is_pci(dev))
+ return domain_context_mapping_one(domain, iommu, bus, devfn,
translation);
+
+ data.domain = domain;
+ data.iommu = iommu;
+ data.translation = translation;
+
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+ &domain_context_mapping_cb, &data);
+}
+
+static int domain_context_mapped_cb(struct pci_dev *pdev,
+ u16 alias, void *opaque)
+{
+ struct intel_iommu *iommu = opaque;
+
+ return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
}

static int domain_context_mapped(struct device *dev)
{
- int ret;
- struct pci_dev *pdev, *tmp, *parent;
struct intel_iommu *iommu;
u8 bus, devfn;

@@ -1895,30 +1897,11 @@ static int domain_context_mapped(struct device *dev)
if (!iommu)
return -ENODEV;

- ret = device_context_mapped(iommu, bus, devfn);
- if (!ret || !dev_is_pci(dev))
- return ret;
+ if (dev_is_pci(dev))
+ return device_context_mapped(iommu, bus, devfn);

- /* dependent device mapping */
- pdev = to_pci_dev(dev);
- tmp = pci_find_upstream_pcie_bridge(pdev);
- if (!tmp)
- return ret;
- /* Secondary interface's bus number and devfn 0 */
- parent = pdev->bus->self;
- while (parent != tmp) {
- ret = device_context_mapped(iommu, parent->bus->number,
- parent->devfn);
- if (!ret)
- return ret;
- parent = parent->bus->self;
- }
- if (pci_is_pcie(tmp))
- return device_context_mapped(iommu, tmp->subordinate->number,
- 0);
- else
- return device_context_mapped(iommu, tmp->bus->number,
- tmp->devfn);
+ return !pci_for_each_dma_alias(to_pci_dev(dev),
+ domain_context_mapped_cb, iommu);
}

/* Returns a number of VTD pages, but aligned to MM page size */
@@ -2207,79 +2190,86 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
return domain;
}

+static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ *(u16 *)opaque = alias;
+ return 0;
+}
+
/* domain is initialized */
static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
{
- struct dmar_domain *domain, *free = NULL;
- struct intel_iommu *iommu = NULL;
+ struct dmar_domain *domain, *tmp;
+ struct intel_iommu *iommu;
struct device_domain_info *info;
- struct pci_dev *dev_tmp = NULL;
+ u16 dma_alias;
unsigned long flags;
- u8 bus, devfn, bridge_bus, bridge_devfn;
+ u8 bus, devfn;

domain = find_domain(dev);
if (domain)
return domain;

+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return NULL;
+
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
- u16 segment;

- segment = pci_domain_nr(pdev->bus);
- dev_tmp = pci_find_upstream_pcie_bridge(pdev);
- if (dev_tmp) {
- if (pci_is_pcie(dev_tmp)) {
- bridge_bus = dev_tmp->subordinate->number;
- bridge_devfn = 0;
- } else {
- bridge_bus = dev_tmp->bus->number;
- bridge_devfn = dev_tmp->devfn;
- }
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dmar_search_domain_by_dev_info(segment,
- bridge_bus,
- bridge_devfn);
- if (info) {
- iommu = info->iommu;
- domain = info->domain;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
- /* pcie-pci bridge already has a domain, uses it */
- if (info)
- goto found_domain;
+ pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
+ PCI_BUS_NUM(dma_alias),
+ dma_alias & 0xff);
+ if (info) {
+ iommu = info->iommu;
+ domain = info->domain;
}
- }
+ spin_unlock_irqrestore(&device_domain_lock, flags);

- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- goto error;
+ /* DMA alias already has a domain, uses it */
+ if (info)
+ goto found_domain;
+ }

/* Allocate and initialize new domain for the device */
domain = alloc_domain(false);
if (!domain)
- goto error;
+ return NULL;
+
if (iommu_attach_domain(domain, iommu)) {
free_domain_mem(domain);
- domain = NULL;
- goto error;
+ return NULL;
}
- free = domain;
- if (domain_init(domain, gaw))
- goto error;

- /* register pcie-to-pci device */
- if (dev_tmp) {
- domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
- NULL, domain);
+ if (domain_init(domain, gaw)) {
+ domain_exit(domain);
+ return NULL;
+ }
+
+ /* register PCI DMA alias device */
+ if (dev_is_pci(dev)) {
+ tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
+ dma_alias & 0xff, NULL, domain);
+
+ if (!tmp || tmp != domain) {
+ domain_exit(domain);
+ domain = tmp;
+ }
+
if (!domain)
- goto error;
+ return NULL;
}

found_domain:
- domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
-error:
- if (free != domain)
- domain_exit(free);
+ tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
+
+ if (!tmp || tmp != domain) {
+ domain_exit(domain);
+ domain = tmp;
+ }

return domain;
}
@@ -4029,33 +4019,21 @@ out_free_dmar:
return ret;
}

+static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ struct intel_iommu *iommu = opaque;
+
+ iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
+ return 0;
+}
+
static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
struct device *dev)
{
- struct pci_dev *tmp, *parent, *pdev;
-
if (!iommu || !dev || !dev_is_pci(dev))
return;

- pdev = to_pci_dev(dev);
-
- /* dependent device detach */
- tmp = pci_find_upstream_pcie_bridge(pdev);
- /* Secondary interface's bus number and devfn 0 */
- if (tmp) {
- parent = pdev->bus->self;
- while (parent != tmp) {
- iommu_detach_dev(iommu, parent->bus->number,
- parent->devfn);
- parent = parent->bus->self;
- }
- if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
- iommu_detach_dev(iommu,
- tmp->subordinate->number, 0);
- else /* this is a legacy PCI bridge */
- iommu_detach_dev(iommu, tmp->bus->number,
- tmp->devfn);
- }
+ pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
}

static void domain_remove_one_dev_info(struct dmar_domain *domain,
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 9b17489..29ded8f 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -369,29 +369,52 @@ static int set_hpet_sid(struct irte *irte, u8 id)
return 0;
}

+struct set_msi_sid_data {
+ struct pci_dev *pdev;
+ u16 alias;
+};
+
+static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+ struct set_msi_sid_data *data = opaque;
+
+ data->pdev = pdev;
+ data->alias = alias;
+
+ return 0;
+}
+
static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
{
- struct pci_dev *bridge;
+ struct set_msi_sid_data data;

if (!irte || !dev)
return -1;

- /* PCIe device or Root Complex integrated PCI device */
- if (pci_is_pcie(dev) || !dev->bus->parent) {
- set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
- (dev->bus->number << 8) | dev->devfn);
- return 0;
- }
+ pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);

- bridge = pci_find_upstream_pcie_bridge(dev);
- if (bridge) {
- if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
- set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
- (bridge->bus->number << 8) | dev->bus->number);
- else /* this is a legacy PCI bridge */
- set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
- (bridge->bus->number << 8) | bridge->devfn);
- }
+ /*
+ * DMA alias provides us with a PCI device and alias. The only case
+ * where the it will return an alias on a different bus than the
+ * device is the case of a PCIe-to-PCI bridge, where the alias is for
+ * the subordinate bus. In this case we can only verify the bus.
+ *
+ * If the alias device is on a different bus than our source device
+ * then we have a topology based alias, use it.
+ *
+ * Otherwise, the alias is for a device DMA quirk and we cannot
+ * assume that MSI uses the same requester ID. Therefore use the
+ * original device.
+ */
+ if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
+ set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
+ PCI_DEVID(PCI_BUS_NUM(data.alias),
+ PCI_BUS_NUM(data.alias)));
+ else if (data.pdev->bus->number != dev->bus->number)
+ set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
+ else
+ set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+ PCI_DEVID(dev->bus->number, dev->devfn));

return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/