Re: [PATCH v11 12/13] vfio/pci: Register a DMA fault response region

From: Auger Eric
Date: Thu Feb 18 2021 - 07:19:03 EST


Hi Shameer,

On 2/18/21 11:36 AM, Shameerali Kolothum Thodi wrote:
> Hi Eric,
>
>>> -----Original Message-----
>>> From: Eric Auger [mailto:eric.auger@xxxxxxxxxx]
>>> Sent: 16 November 2020 11:00
>>> To: eric.auger.pro@xxxxxxxxx; eric.auger@xxxxxxxxxx;
>>> iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx;
>>> kvm@xxxxxxxxxxxxxxx; kvmarm@xxxxxxxxxxxxxxxxxxxxx; will@xxxxxxxxxx;
>>> joro@xxxxxxxxxx; maz@xxxxxxxxxx; robin.murphy@xxxxxxx;
>>> alex.williamson@xxxxxxxxxx
>>> Cc: jean-philippe@xxxxxxxxxx; zhangfei.gao@xxxxxxxxxx;
>>> zhangfei.gao@xxxxxxxxx; vivek.gautam@xxxxxxx; Shameerali Kolothum
>>> Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>;
>>> jacob.jun.pan@xxxxxxxxxxxxxxx; yi.l.liu@xxxxxxxxx; tn@xxxxxxxxxxxx;
>>> nicoleotsuka@xxxxxxxxx; yuzenghui <yuzenghui@xxxxxxxxxx>
>>> Subject: [PATCH v11 12/13] vfio/pci: Register a DMA fault response
>>> region
>>>
>>> In preparation for vSVA, let's register a DMA fault response region,
>>> where the userspace will push the page responses and increment the
>>> head of the buffer. The kernel will pop those responses and inject
>>> them on iommu side.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>
>>> ---
>>> drivers/vfio/pci/vfio_pci.c | 114 +++++++++++++++++++++++++---
>>> drivers/vfio/pci/vfio_pci_private.h | 5 ++
>>> drivers/vfio/pci/vfio_pci_rdwr.c | 39 ++++++++++
>>> include/uapi/linux/vfio.h | 32 ++++++++
>>> 4 files changed, 181 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>>> index 65a83fd0e8c0..e9a904ce3f0d 100644
>>> --- a/drivers/vfio/pci/vfio_pci.c
>>> +++ b/drivers/vfio/pci/vfio_pci.c
>>> @@ -318,9 +318,20 @@ static void vfio_pci_dma_fault_release(struct
>>> vfio_pci_device *vdev,
>>> kfree(vdev->fault_pages);
>>> }
>>>
>>> -static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
>>> - struct vfio_pci_region *region,
>>> - struct vm_area_struct *vma)
>>> +static void
>>> +vfio_pci_dma_fault_response_release(struct vfio_pci_device *vdev,
>>> + struct vfio_pci_region *region) {
>>> + if (vdev->dma_fault_response_wq)
>>> + destroy_workqueue(vdev->dma_fault_response_wq);
>>> + kfree(vdev->fault_response_pages);
>>> + vdev->fault_response_pages = NULL;
>>> +}
>>> +
>>> +static int __vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
>>> + struct vfio_pci_region *region,
>>> + struct vm_area_struct *vma,
>>> + u8 *pages)
>>> {
>>> u64 phys_len, req_len, pgoff, req_start;
>>> unsigned long long addr;
>>> @@ -333,14 +344,14 @@ static int vfio_pci_dma_fault_mmap(struct
>>> vfio_pci_device *vdev,
>>> ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
>>> req_start = pgoff << PAGE_SHIFT;
>>>
>>> - /* only the second page of the producer fault region is mmappable */
>>> + /* only the second page of the fault region is mmappable */
>>> if (req_start < PAGE_SIZE)
>>> return -EINVAL;
>>>
>>> if (req_start + req_len > phys_len)
>>> return -EINVAL;
>>>
>>> - addr = virt_to_phys(vdev->fault_pages);
>>> + addr = virt_to_phys(pages);
>>> vma->vm_private_data = vdev;
>>> vma->vm_pgoff = (addr >> PAGE_SHIFT) + pgoff;
>>>
>>> @@ -349,13 +360,29 @@ static int vfio_pci_dma_fault_mmap(struct
>>> vfio_pci_device *vdev,
>>> return ret;
>>> }
>>>
>>> -static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
>>> - struct vfio_pci_region *region,
>>> - struct vfio_info_cap *caps)
>>> +static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
>>> + struct vfio_pci_region *region,
>>> + struct vm_area_struct *vma)
>>> +{
>>> + return __vfio_pci_dma_fault_mmap(vdev, region, vma,
>>> vdev->fault_pages);
>>> +}
>>> +
>>> +static int
>>> +vfio_pci_dma_fault_response_mmap(struct vfio_pci_device *vdev,
>>> + struct vfio_pci_region *region,
>>> + struct vm_area_struct *vma)
>>> +{
>>> + return __vfio_pci_dma_fault_mmap(vdev, region, vma,
>>> vdev->fault_response_pages);
>>> +}
>>> +
>>> +static int __vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
>>> + struct vfio_pci_region *region,
>>> + struct vfio_info_cap *caps,
>>> + u32 cap_id)
>>> {
>>> struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
>>> struct vfio_region_info_cap_fault cap = {
>>> - .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
>>> + .header.id = cap_id,
>>> .header.version = 1,
>>> .version = 1,
>>> };
>>> @@ -383,6 +410,14 @@ static int
>>> vfio_pci_dma_fault_add_capability(struct
>>> vfio_pci_device *vdev,
>>> return ret;
>>> }
>>>
>>> +static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
>>> + struct vfio_pci_region *region,
>>> + struct vfio_info_cap *caps) {
>>> + return __vfio_pci_dma_fault_add_capability(vdev, region, caps,
>>> + VFIO_REGION_INFO_CAP_DMA_FAULT); }
>>> +
>>> static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
>>> .rw = vfio_pci_dma_fault_rw,
>>> .release = vfio_pci_dma_fault_release,
>>> @@ -390,6 +425,13 @@ static const struct vfio_pci_regops
>>> vfio_pci_dma_fault_regops = {
>>> .add_capability = vfio_pci_dma_fault_add_capability,
>>> };
>>>
>>> +static const struct vfio_pci_regops vfio_pci_dma_fault_response_regops = {
>>> + .rw = vfio_pci_dma_fault_response_rw,
>>> + .release = vfio_pci_dma_fault_response_release,
>>> + .mmap = vfio_pci_dma_fault_response_mmap,
>>> + .add_capability = vfio_pci_dma_fault_add_capability,
>
> As I mentioned in the Qemu patch ([RFC v7 26/26] vfio/pci: Implement
> return_page_response page response callback), it looks like we are using the
> VFIO_REGION_INFO_CAP_DMA_FAULT cap id for the dma_fault_response here
> as well. Is that intentional?
> (Was wondering how it worked in the first place and noted this).
yep, copy paste error :-(

Thanks

Eric
>
> Please check.
>
> Thanks,
> Shameer
>