[RFC][PATCH v2 07/13] iommu/arm-smmu-v3: Add shared VMID support for NESTING

From: Nicolin Chen
Date: Mon Aug 30 2021 - 23:08:03 EST


A VMID can be shared among iommu domains being attached to the same
Virtual Machine in order to improve utilization of TLB cache.

This patch implements ->set_nesting_vmid() and ->get_nesting_vmid()
to set/get s2_cfg->vmid for nesting cases, and then changes to reuse
the VMID.

Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 65 +++++++++++++++++++--
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index a388e318f86e..c0ae117711fa 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2051,7 +2051,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
- if (cfg->vmid)
+ if (cfg->vmid && !atomic_dec_return(&smmu->vmid_refcnts[cfg->vmid]))
arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
}

@@ -2121,17 +2121,28 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
- int vmid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;

- vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
- if (vmid < 0)
- return vmid;
+ /*
+ * For a nested case where there are multiple passthrough devices to a
+ * VM, they share a commond VMID, allocated when the first passthrough
+ * device is attached to the VM. So the cfg->vmid might be already set
+ * in arm_smmu_set_nesting_vmid(), reported from the hypervisor. In this
+ * case, simply reuse the shared VMID and increase its refcount.
+ */
+ if (!cfg->vmid) {
+ int vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
+
+ if (vmid < 0)
+ return vmid;
+ cfg->vmid = (u16)vmid;
+ }
+
+ atomic_inc(&smmu->vmid_refcnts[cfg->vmid]);

vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
- cfg->vmid = (u16)vmid;
cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
@@ -2731,6 +2742,44 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain)
return ret;
}

+static int arm_smmu_set_nesting_vmid(struct iommu_domain *domain, u32 vmid)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
+ int ret = 0;
+
+ if (vmid == IOMMU_VMID_INVALID)
+ return -EINVAL;
+
+ mutex_lock(&smmu_domain->init_mutex);
+ if (smmu_domain->smmu || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
+ ret = -EPERM;
+ else
+ s2_cfg->vmid = vmid;
+ mutex_unlock(&smmu_domain->init_mutex);
+
+ return ret;
+}
+
+static int arm_smmu_get_nesting_vmid(struct iommu_domain *domain, u32 *vmid)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
+ int ret = 0;
+
+ if (!vmid)
+ return -EINVAL;
+
+ mutex_lock(&smmu_domain->init_mutex);
+ if (smmu_domain->smmu || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
+ ret = -EPERM;
+ else
+ *vmid = s2_cfg->vmid;
+ mutex_unlock(&smmu_domain->init_mutex);
+
+ return ret;
+}
+
static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
return iommu_fwspec_add_ids(dev, args->args, 1);
@@ -2845,6 +2894,8 @@ static struct iommu_ops arm_smmu_ops = {
.release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.enable_nesting = arm_smmu_enable_nesting,
+ .set_nesting_vmid = arm_smmu_set_nesting_vmid,
+ .get_nesting_vmid = arm_smmu_get_nesting_vmid,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
@@ -3530,6 +3581,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
/* ASID/VMID sizes */
smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
+ smmu->vmid_refcnts = devm_kcalloc(smmu->dev, 1 << smmu->vmid_bits,
+ sizeof(*smmu->vmid_refcnts), GFP_KERNEL);

/* IDR1 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 4cb136f07914..ea2c61d52df8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -664,6 +664,7 @@ struct arm_smmu_device {
#define ARM_SMMU_MAX_VMIDS (1 << 16)
unsigned int vmid_bits;
DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
+ atomic_t *vmid_refcnts;

unsigned int ssid_bits;
unsigned int sid_bits;
--
2.17.1