[PATCH 4/4] vfio-pci: Best-effort huge pfnmaps with !MAP_FIXED mappings
From: Peter Xu
Date: Fri May 30 2025 - 13:21:20 EST
This patch enables best-effort mmap() for vfio-pci bars even without
MAP_FIXED, so as to utilize huge pfnmaps as much as possible. It should
also avoid userspace changes (switching to MAP_FIXED with pre-aligned VA
addresses) to start enabling huge pfnmaps on VFIO bars.
Here the trick is making sure the MMIO PFNs will be aligned with the VAs
allocated from mmap() when !MAP_FIXED, so that whatever returned from
mmap(!MAP_FIXED) of vfio-pci MMIO regions will be automatically suitable
for huge pfnmaps as much as possible.
To achieve that, a custom vfio_device's get_unmapped_area() for vfio-pci
devices is needed.
Note, MMIO physical addresses should normally be guaranteed to be always
bar-size aligned, hence the bar offset can logically be directly used to do
the calculation. However to make it strict and clear (rather than relying
on spec details), we still try to fetch the bar's physical addresses from
pci_dev.resource[].
[1] https://lore.kernel.org/linux-pci/20250529214414.1508155-1-amastro@xxxxxx/
Reported-by: Alex Mastro <amastro@xxxxxx>
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
drivers/vfio/pci/vfio_pci.c | 1 +
drivers/vfio/pci/vfio_pci_core.c | 34 ++++++++++++++++++++++++++++++++
include/linux/vfio_pci_core.h | 3 +++
3 files changed, 38 insertions(+)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 5ba39f7623bb..32b570f17d0f 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -144,6 +144,7 @@ static const struct vfio_device_ops vfio_pci_ops = {
.detach_ioas = vfio_iommufd_physical_detach_ioas,
.pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas,
.pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas,
+ .get_unmapped_area = vfio_pci_core_get_unmapped_area,
};
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 6328c3a05bcd..5392bec4929a 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1641,6 +1641,40 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
}
+/*
+ * Hint function to provide mmap() virtual address candidate so as to be
+ * able to map huge pfnmaps as much as possible. It is done by aligning
+ * the VA to the PFN to be mapped in the specific bar.
+ *
+ * Note that this function does the minimum check on mmap() parameters to
+ * make the PFN calculation valid only. The majority of mmap() sanity check
+ * will be done later in mmap().
+ */
+unsigned long vfio_pci_core_get_unmapped_area(struct vfio_device *device,
+ struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+ struct pci_dev *pdev = vdev->pdev;
+ unsigned int index = pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ unsigned long req_start;
+
+ /* Currently, only bars 0-5 supports huge pfnmap */
+ if (index >= VFIO_PCI_ROM_REGION_INDEX)
+ return 0;
+
+ /* Calculate the start of physical address to be mapped */
+ req_start = (pgoff << PAGE_SHIFT) & ((1UL << VFIO_PCI_OFFSET_SHIFT) - 1);
+ if (check_add_overflow(req_start, pci_resource_start(pdev, index),
+ &req_start))
+ return 0;
+
+ return huge_mapping_get_va_aligned(file, addr, len, req_start >> PAGE_SHIFT,
+ flags);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_get_unmapped_area);
+
static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
unsigned int order)
{
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index fbb472dd99b3..d97c920b4dbf 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos);
+unsigned long vfio_pci_core_get_unmapped_area(struct vfio_device *device,
+ struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags);
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
--
2.49.0
--
Peter Xu