Re: [PATCH v11 05/22] vfio iommu: Added pin and unpin callback functions to vfio_iommu_driver_ops

From: Alex Williamson
Date: Mon Nov 07 2016 - 14:36:57 EST


On Sat, 5 Nov 2016 02:40:39 +0530
Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote:

> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
> IOMMU module that supports pining and unpinning pages for mdev devices
> should provide these functions.
> Added APIs for pining and unpining pages to VFIO module. These calls back
> into backend iommu module to actually pin and unpin pages.
> Renamed static functions in vfio_type1_iommu.c to resolve conflicts
>
> Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx>
> Change-Id: Ia7417723aaae86bec2959ad9ae6c2915ddd340e0
> ---
> drivers/vfio/vfio.c | 96 +++++++++++++++++++++++++++++++++++++++++
> drivers/vfio/vfio_iommu_type1.c | 20 ++++-----
> include/linux/vfio.h | 14 +++++-
> 3 files changed, 119 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 2e83bdf007fe..76d260e98930 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1799,6 +1799,102 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
> }
> EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>
> +
> +/*
> + * Pin a set of guest PFNs and return their associated host PFNs for local
> + * domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs
> + * @npage [in]: count of array elements
> + * @prot [in] : protection flags
> + * @phys_pfn[out] : array of host PFNs
> + * Return error or number of pages pinned.
> + */
> +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> + int npage, int prot, unsigned long *phys_pfn)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + int ret;
> +
> + if (!dev || !user_pfn || !phys_pfn)
> + return -EINVAL;
> +
> + group = vfio_group_get_from_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> +
> + ret = vfio_group_add_container_user(group);
> + if (ret)
> + goto err_pin_pages;
> +
> + container = group->container;
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (likely(driver && driver->ops->pin_pages))
> + ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
> + npage, prot, phys_pfn);
> + else
> + ret = -EINVAL;

-ENOTTY might be a more appropriate error return here and below since
we're not signaling invalid argument, we're signaling lack of support.

> +
> + up_read(&container->group_lock);
> + vfio_group_try_dissolve_container(group);
> +
> +err_pin_pages:
> + vfio_group_put(group);
> + return ret;
> +

Unnecessary extra line

> +}
> +EXPORT_SYMBOL(vfio_pin_pages);
> +
> +/*
> + * Unpin set of host PFNs for local domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs to be unpinned
> + * @pfn [in] : array of host PFNs to be unpinned.
> + * @npage [in] :count of elements in array, that is number of pages.
> + * Return error or number of pages unpinned.
> + */
> +int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
> + unsigned long *pfn, int npage)
> +{
> + struct vfio_container *container;
> + struct vfio_group *group;
> + struct vfio_iommu_driver *driver;
> + int ret;
> +
> + if (!dev || !pfn)
> + return -EINVAL;
> +
> + group = vfio_group_get_from_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
> +
> + ret = vfio_group_add_container_user(group);
> + if (ret)
> + goto err_unpin_pages;
> +
> + container = group->container;
> + down_read(&container->group_lock);
> +
> + driver = container->iommu_driver;
> + if (likely(driver && driver->ops->unpin_pages))
> + ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
> + pfn, npage);
> + else
> + ret = -EINVAL;
> +
> + up_read(&container->group_lock);
> + vfio_group_try_dissolve_container(group);
> +
> +err_unpin_pages:
> + vfio_group_put(group);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfio_unpin_pages);
> +
> /**
> * Module/class support
> */
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 2ba19424e4a1..7fb87f008e0a 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -259,8 +259,8 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
> * the iommu can only map chunks of consecutive pfns anyway, so get the
> * first page and all consecutive pages with the same locking.
> */
> -static long vfio_pin_pages(unsigned long vaddr, long npage,
> - int prot, unsigned long *pfn_base)
> +static long __vfio_pin_pages_remote(unsigned long vaddr, long npage,
> + int prot, unsigned long *pfn_base)

nit, what is the additional underscore prefix intended to imply?
Appending _remote is sufficient to avoid the symbol conflict.

> {
> unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> bool lock_cap = capable(CAP_IPC_LOCK);
> @@ -318,8 +318,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
> return i;
> }
>
> -static long vfio_unpin_pages(unsigned long pfn, long npage,
> - int prot, bool do_accounting)
> +static long __vfio_unpin_pages_remote(unsigned long pfn, long npage,
> + int prot, bool do_accounting)
> {
> unsigned long unlocked = 0;
> long i;
> @@ -382,9 +382,9 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
> if (WARN_ON(!unmapped))
> break;
>
> - unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
> - unmapped >> PAGE_SHIFT,
> - dma->prot, false);
> + unlocked += __vfio_unpin_pages_remote(phys >> PAGE_SHIFT,
> + unmapped >> PAGE_SHIFT,
> + dma->prot, false);
> iova += unmapped;
>
> cond_resched();
> @@ -613,8 +613,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>
> while (size) {
> /* Pin a contiguous chunk of memory */
> - npage = vfio_pin_pages(vaddr + dma->size,
> - size >> PAGE_SHIFT, prot, &pfn);
> + npage = __vfio_pin_pages_remote(vaddr + dma->size,
> + size >> PAGE_SHIFT, prot, &pfn);
> if (npage <= 0) {
> WARN_ON(!npage);
> ret = (int)npage;
> @@ -624,7 +624,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
> /* Map it! */
> ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
> if (ret) {
> - vfio_unpin_pages(pfn, npage, prot, true);
> + __vfio_unpin_pages_remote(pfn, npage, prot, true);
> break;
> }
>
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 0ecae0b1cd34..ba1b64cb7d4b 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -75,7 +75,13 @@ struct vfio_iommu_driver_ops {
> struct iommu_group *group);
> void (*detach_group)(void *iommu_data,
> struct iommu_group *group);
> -
> + int (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
> + int npage, int prot,
> + unsigned long *phys_pfn);
> + int (*unpin_pages)(void *iommu_data,

Are we changing from long to int here simply because of the absurdity
in passing in more than a 2^31 entry array, that would already consume
more than 16GB itself?

> + unsigned long *user_pfn,
> + unsigned long *pfn,

nit, use phys_pfn so as to match the pin function?

> + int npage);
> };
>
> extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
> @@ -127,6 +133,12 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> }
> #endif /* CONFIG_EEH */
>
> +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> + int npage, int prot, unsigned long *phys_pfn);
> +
> +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
> + unsigned long *pfn, int npage);
> +
> /*
> * IRQfd - generic
> */