[patch 08/26] x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d)

From: Suresh Siddha
Date: Thu Jul 10 2008 - 14:48:39 EST


Queued invalidation (part of Intel Virtualization Technology for
Directed I/O architecture) infrastructure.

This will be used for invalidating the interrupt entry cache in the
case of Interrupt-remapping and IOTLB invalidation in the case
of DMA-remapping.

Signed-off-by: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
---

Index: tree-x86/drivers/pci/dmar.c
===================================================================
--- tree-x86.orig/drivers/pci/dmar.c 2008-07-10 09:51:57.000000000 -0700
+++ tree-x86/drivers/pci/dmar.c 2008-07-10 09:52:01.000000000 -0700
@@ -28,6 +28,7 @@

#include <linux/pci.h>
#include <linux/dmar.h>
+#include <linux/timer.h>
#include "iova.h"
#include "intel-iommu.h"

@@ -509,3 +510,152 @@
iounmap(iommu->reg);
kfree(iommu);
}
+
+/*
+ * Reclaim all the submitted descriptors which have completed its work.
+ */
+static inline void reclaim_free_desc(struct q_inval *qi)
+{
+ while (qi->desc_status[qi->free_tail] == QI_DONE) {
+ qi->desc_status[qi->free_tail] = QI_FREE;
+ qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
+ qi->free_cnt++;
+ }
+}
+
+/*
+ * Submit the queued invalidation descriptor to the remapping
+ * hardware unit and wait for its completion.
+ */
+void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+{
+ struct q_inval *qi = iommu->qi;
+ struct qi_desc *hw, wait_desc;
+ int wait_index, index;
+ unsigned long flags;
+
+ if (!qi)
+ return;
+
+ hw = qi->desc;
+
+ spin_lock(&qi->q_lock);
+ while (qi->free_cnt < 3) {
+ spin_unlock(&qi->q_lock);
+ cpu_relax();
+ spin_lock(&qi->q_lock);
+ }
+
+ index = qi->free_head;
+ wait_index = (index + 1) % QI_LENGTH;
+
+ qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+
+ hw[index] = *desc;
+
+ wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+ wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+
+ hw[wait_index] = wait_desc;
+
+ __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
+ __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
+
+ qi->free_head = (qi->free_head + 2) % QI_LENGTH;
+ qi->free_cnt -= 2;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ /*
+ * update the HW tail register indicating the presence of
+ * new descriptors.
+ */
+ writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ while (qi->desc_status[wait_index] != QI_DONE) {
+ spin_unlock(&qi->q_lock);
+ cpu_relax();
+ spin_lock(&qi->q_lock);
+ }
+
+ qi->desc_status[index] = QI_DONE;
+
+ reclaim_free_desc(qi);
+ spin_unlock(&qi->q_lock);
+}
+
+/*
+ * Flush the global interrupt entry cache.
+ */
+void qi_global_iec(struct intel_iommu *iommu)
+{
+ struct qi_desc desc;
+
+ desc.low = QI_IEC_TYPE;
+ desc.high = 0;
+
+ qi_submit_sync(&desc, iommu);
+}
+
+/*
+ * Enable Queued Invalidation interface. This is a must to support
+ * interrupt-remapping. Also used by DMA-remapping, which replaces
+ * register based IOTLB invalidation.
+ */
+int dmar_enable_qi(struct intel_iommu *iommu)
+{
+ u32 cmd, sts;
+ unsigned long flags;
+ struct q_inval *qi;
+
+ if (!ecap_qis(iommu->ecap))
+ return -ENOENT;
+
+ /*
+ * queued invalidation is already setup and enabled.
+ */
+ if (iommu->qi)
+ return 0;
+
+ iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+ if (!iommu->qi)
+ return -ENOMEM;
+
+ qi = iommu->qi;
+
+ qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+ if (!qi->desc) {
+ kfree(qi);
+ iommu->qi = 0;
+ return -ENOMEM;
+ }
+
+ qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+ if (!qi->desc_status) {
+ free_page((unsigned long) qi->desc);
+ kfree(qi);
+ iommu->qi = 0;
+ return -ENOMEM;
+ }
+
+ qi->free_head = qi->free_tail = 0;
+ qi->free_cnt = QI_LENGTH;
+
+ spin_lock_init(&qi->q_lock);
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ /* write zero to the tail reg */
+ writel(0, iommu->reg + DMAR_IQT_REG);
+
+ dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+ cmd = iommu->gcmd | DMA_GCMD_QIE;
+ iommu->gcmd |= DMA_GCMD_QIE;
+ writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+ /* Make sure hardware complete it */
+ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ return 0;
+}
Index: tree-x86/drivers/pci/intel-iommu.c
===================================================================
--- tree-x86.orig/drivers/pci/intel-iommu.c 2008-07-10 09:51:59.000000000 -0700
+++ tree-x86/drivers/pci/intel-iommu.c 2008-07-10 09:52:01.000000000 -0700
@@ -181,13 +181,6 @@
kmem_cache_free(iommu_iova_cache, iova);
}

-static inline void __iommu_flush_cache(
- struct intel_iommu *iommu, void *addr, int size)
-{
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(addr, size);
-}
-
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
Index: tree-x86/drivers/pci/intel-iommu.h
===================================================================
--- tree-x86.orig/drivers/pci/intel-iommu.h 2008-07-10 09:51:59.000000000 -0700
+++ tree-x86/drivers/pci/intel-iommu.h 2008-07-10 09:52:01.000000000 -0700
@@ -27,6 +27,7 @@
#include <linux/sysdev.h>
#include "iova.h"
#include <linux/io.h>
+#include <asm/cacheflush.h>
#include "dma_remapping.h"

/*
@@ -51,6 +52,10 @@
#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
+#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
+#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
+#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
+#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */

#define OFFSET_STRIDE (9)
/*
@@ -114,6 +119,7 @@
#define ecap_max_iotlb_offset(e) \
(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
#define ecap_coherent(e) ((e) & 0x1)
+#define ecap_qis(e) ((e) & 0x2)
#define ecap_eim_support(e) ((e >> 4) & 0x1)
#define ecap_ir_support(e) ((e >> 3) & 0x1)

@@ -131,6 +137,17 @@
#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
#define DMA_TLB_MAX_SIZE (0x3f)

+/* INVALID_DESC */
+#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
+#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
+#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
+#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
+#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr) (addr)
+#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
+
/* PMEN_REG */
#define DMA_PMEN_EPM (((u32)1)<<31)
#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -140,6 +157,7 @@
#define DMA_GCMD_SRTP (((u32)1) << 30)
#define DMA_GCMD_SFL (((u32)1) << 29)
#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_QIE (((u32)1) << 26)
#define DMA_GCMD_WBF (((u32)1) << 27)

/* GSTS_REG */
@@ -147,6 +165,7 @@
#define DMA_GSTS_RTPS (((u32)1) << 30)
#define DMA_GSTS_FLS (((u32)1) << 29)
#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_QIES (((u32)1) << 26)
#define DMA_GSTS_WBFS (((u32)1) << 27)

/* CCMD_REG */
@@ -192,6 +211,40 @@
}\
}

+#define QI_LENGTH 256 /* queue length */
+
+enum {
+ QI_FREE,
+ QI_IN_USE,
+ QI_DONE
+};
+
+#define QI_CC_TYPE 0x1
+#define QI_IOTLB_TYPE 0x2
+#define QI_DIOTLB_TYPE 0x3
+#define QI_IEC_TYPE 0x4
+#define QI_IWD_TYPE 0x5
+
+#define QI_IEC_SELECTIVE (((u64)1) << 4)
+#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
+
+#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
+
+struct qi_desc {
+ u64 low, high;
+};
+
+struct q_inval {
+ spinlock_t q_lock;
+ struct qi_desc *desc; /* invalidation queue */
+ int *desc_status; /* desc status */
+ int free_head; /* first free entry */
+ int free_tail; /* last free entry */
+ int free_cnt;
+};
+
struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 cap;
@@ -212,8 +265,16 @@
struct msi_msg saved_msg;
struct sys_device sysdev;
#endif
+ struct q_inval *qi; /* Queued invalidation info */
};

+static inline void __iommu_flush_cache(
+ struct intel_iommu *iommu, void *addr, int size)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+}
+
extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);

extern int alloc_iommu(struct dmar_drhd_unit *drhd);

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/