Re: [PATCH v2] vfio: fix potential deadlock on vfio group lock

From: Alex Williamson
Date: Thu Jan 12 2023 - 16:26:55 EST


On Thu, 12 Jan 2023 15:38:44 -0500
Matthew Rosato <mjrosato@xxxxxxxxxxxxx> wrote:

> Currently it is possible that the final put of a KVM reference comes from
> vfio during its device close operation. This occurs while the vfio group
> lock is held; however, if the vfio device is still in the kvm device list,
> then the following call chain could result in a deadlock:
>
> kvm_put_kvm
> -> kvm_destroy_vm
> -> kvm_destroy_devices
> -> kvm_vfio_destroy
> -> kvm_vfio_file_set_kvm
> -> vfio_file_set_kvm
> -> group->group_lock/group_rwsem
>
> Avoid this scenario by having vfio core code acquire a KVM reference
> the first time a device is opened and hold that reference until the
> device fd is closed, at a point after the group lock has been released.
>
> Fixes: 421cfe6596f6 ("vfio: remove VFIO_GROUP_NOTIFY_SET_KVM")
> Reported-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
> ---
> Changes from v1:
> * Re-write using symbol get logic to get kvm ref during first device
> open, release the ref during device fd close after group lock is
> released
> * Drop kvm get/put changes to drivers; now that vfio core holds a
> kvm ref until sometime after the device_close op is called, it
> should be fine for drivers to get and put their own references to it.
> ---
> drivers/vfio/group.c | 6 ++---
> drivers/vfio/vfio_main.c | 48 +++++++++++++++++++++++++++++++++++++---
> include/linux/vfio.h | 1 -
> 3 files changed, 48 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index bb24b2f0271e..2b0da82f82f4 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -165,9 +165,9 @@ static int vfio_device_group_open(struct vfio_device *device)
> }
>
> /*
> - * Here we pass the KVM pointer with the group under the lock. If the
> - * device driver will use it, it must obtain a reference and release it
> - * during close_device.
> + * Here we pass the KVM pointer with the group under the lock. A
> + * reference will be obtained the first time the device is opened and
> + * will be held until the device fd is closed.
> */
> ret = vfio_device_open(device, device->group->iommufd,
> device->group->kvm);
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 5177bb061b17..c969e2a0ecd3 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -16,6 +16,7 @@
> #include <linux/fs.h>
> #include <linux/idr.h>
> #include <linux/iommu.h>
> +#include <linux/kvm_host.h>
> #include <linux/list.h>
> #include <linux/miscdevice.h>
> #include <linux/module.h>
> @@ -344,6 +345,35 @@ static bool vfio_assert_device_open(struct vfio_device *device)
> return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
> }
>
> +static bool vfio_kvm_get_kvm_safe(struct kvm *kvm)
> +{
> + bool (*fn)(struct kvm *kvm);
> + bool ret;
> +
> + fn = symbol_get(kvm_get_kvm_safe);
> + if (WARN_ON(!fn))
> + return false;
> +
> + ret = fn(kvm);
> +
> + symbol_put(kvm_get_kvm_safe);
> +
> + return ret;
> +}
> +
> +static void vfio_kvm_put_kvm(struct kvm *kvm)
> +{
> + void (*fn)(struct kvm *kvm);
> +
> + fn = symbol_get(kvm_put_kvm);
> + if (WARN_ON(!fn))
> + return;
> +
> + fn(kvm);
> +
> + symbol_put(kvm_put_kvm);
> +}
> +
> static int vfio_device_first_open(struct vfio_device *device,
> struct iommufd_ctx *iommufd, struct kvm *kvm)
> {
> @@ -361,16 +391,24 @@ static int vfio_device_first_open(struct vfio_device *device,
> if (ret)
> goto err_module_put;
>
> + if (kvm && !vfio_kvm_get_kvm_safe(kvm)) {
> + ret = -ENOENT;
> + goto err_unuse_iommu;
> + }
> device->kvm = kvm;

This could just as easily be:

if (kvm && vfio_kvm_get_kvm_safe(kvm))
device->kvm = kvm;

Right? The error path would test device->kvm and we already use
device->kvm in the release path.

Otherwise, in the off chance userspace hits this error, what's the
value in generating a failure here for a device that may or may not
have a kvm dependency. A driver with a dependency should fail if
device->kvm is NULL.

> if (device->ops->open_device) {
> ret = device->ops->open_device(device);
> if (ret)
> - goto err_unuse_iommu;
> + goto err_put_kvm;
> }
> return 0;
>
> +err_put_kvm:
> + if (kvm) {
> + vfio_kvm_put_kvm(kvm);
> + device->kvm = NULL;
> + }
> err_unuse_iommu:
> - device->kvm = NULL;
> if (iommufd)
> vfio_iommufd_unbind(device);
> else
> @@ -387,7 +425,6 @@ static void vfio_device_last_close(struct vfio_device *device,
>
> if (device->ops->close_device)
> device->ops->close_device(device);
> - device->kvm = NULL;
> if (iommufd)
> vfio_iommufd_unbind(device);
> else
> @@ -465,6 +502,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
>
> vfio_device_group_close(device);
>
> + if (device->open_count == 0 && device->kvm) {
> + vfio_kvm_put_kvm(device->kvm);
> + device->kvm = NULL;
> + }

IIUC, device->open_count is protected by device->dev_set->lock. Thanks,

Alex

> +
> vfio_device_put_registration(device);
>
> return 0;
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 35be78e9ae57..3ff7e9302cc1 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -46,7 +46,6 @@ struct vfio_device {
> struct vfio_device_set *dev_set;
> struct list_head dev_set_list;
> unsigned int migration_flags;
> - /* Driver must reference the kvm during open_device or never touch it */
> struct kvm *kvm;
>
> /* Members below here are private, not for driver use */