[PATCH v2 5/6] iommu: Replace device_lock() with group->mutex

From: Lu Baolu
Date: Fri Feb 17 2023 - 04:56:40 EST


device_lock() was used in iommu_group_store_type() to prevent the
devices in an iommu group from being attached by any device driver.
On the other hand, in order to avoid lock race between group->mutex
and device_lock(), it limited the usage scenario to the singleton
groups.

We already have the DMA ownership scheme to avoid driver attachment
and group->mutex ensures that device ops are always valid, there's
no need for device_lock() anymore. Remove device_lock() and the
singleton group limitation.

Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
drivers/iommu/iommu.c | 78 +++++++++----------------------------------
1 file changed, 16 insertions(+), 62 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index e1ae1eb4faf0..18dac155a178 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2958,14 +2958,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
goto out;
}

- /* We can bring up a flush queue without tearing down the domain */
- if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
- ret = iommu_dma_init_fq(prev_dom);
- if (!ret)
- prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
- goto out;
- }
-
/* Sets group->default_domain to the newly allocated domain */
ret = iommu_group_alloc_default_domain(group, dev, type);
if (ret)
@@ -2998,7 +2990,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
* transition. Return failure if this isn't met.
*
* We need to consider the race between this and the device release path.
- * device_lock(dev) is used here to guarantee that the device release path
+ * group->mutex is used here to guarantee that the device release path
* will not be entered at the same time.
*/
static ssize_t iommu_group_store_type(struct iommu_group *group,
@@ -3024,60 +3016,27 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
else
return -EINVAL;

- /*
- * Lock/Unlock the group mutex here before device lock to
- * 1. Make sure that the iommu group has only one device (this is a
- * prerequisite for step 2)
- * 2. Get struct *dev which is needed to lock device
- */
mutex_lock(&group->mutex);
- if (iommu_group_device_count(group) != 1) {
+ /* We can bring up a flush queue without tearing down the domain. */
+ if (req_type == IOMMU_DOMAIN_DMA_FQ &&
+ group->default_domain->type == IOMMU_DOMAIN_DMA) {
+ ret = iommu_dma_init_fq(group->default_domain);
+ if (!ret)
+ group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
mutex_unlock(&group->mutex);
- pr_err_ratelimited("Cannot change default domain: Group has more than one device\n");
- return -EINVAL;
+
+ return ret ?: count;
}

- /* Since group has only one device */
+ /* Otherwise, ensure that device exists and no driver is bound. */
dev = iommu_group_first_dev(group);
- get_device(dev);
-
- /*
- * Don't hold the group mutex because taking group mutex first and then
- * the device lock could potentially cause a deadlock as below. Assume
- * two threads T1 and T2. T1 is trying to change default domain of an
- * iommu group and T2 is trying to hot unplug a device or release [1] VF
- * of a PCIe device which is in the same iommu group. T1 takes group
- * mutex and before it could take device lock assume T2 has taken device
- * lock and is yet to take group mutex. Now, both the threads will be
- * waiting for the other thread to release lock. Below, lock order was
- * suggested.
- * device_lock(dev);
- * mutex_lock(&group->mutex);
- * iommu_change_dev_def_domain();
- * mutex_unlock(&group->mutex);
- * device_unlock(dev);
- *
- * [1] Typical device release path
- * device_lock() from device/driver core code
- * -> bus_notifier()
- * -> iommu_bus_notifier()
- * -> iommu_release_device()
- * -> ops->release_device() vendor driver calls back iommu core code
- * -> mutex_lock() from iommu core code
- */
- mutex_unlock(&group->mutex);
-
- /* Check if the device in the group still has a driver bound to it */
- device_lock(dev);
- if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
- group->default_domain->type == IOMMU_DOMAIN_DMA)) {
- pr_err_ratelimited("Device is still bound to driver\n");
- ret = -EBUSY;
- goto out;
+ if (!dev || group->owner_cnt) {
+ mutex_unlock(&group->mutex);
+ return -EPERM;
}

- mutex_lock(&group->mutex);
ret = iommu_change_dev_def_domain(group, dev, req_type);
+
/*
* Release the mutex here because ops->probe_finalize() call-back of
* some vendor IOMMU drivers calls arm_iommu_attach_device() which
@@ -3088,14 +3047,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,

/* Make sure dma_ops is appropriatley set */
if (!ret)
- iommu_group_do_probe_finalize(dev, group->default_domain);
- ret = ret ?: count;
-
-out:
- device_unlock(dev);
- put_device(dev);
+ __iommu_group_dma_finalize(group);

- return ret;
+ return ret ?: count;
}

static bool iommu_is_default_domain(struct iommu_group *group)
--
2.34.1