[RFC PATCH 3/3] PCI: Expose PCIe Resizable BAR support via sysfs

From: Alex Williamson
Date: Tue Aug 16 2022 - 15:41:15 EST


This proposes a simple sysfs interface to Resizable BAR support,
largely for the purposes of assigning such devices to a VM through
VFIO. Resizable BARs present a difficult feature to expose to a VM
through emulation, as resizing a BAR is done on the host. It can
fail, and often does, but we have no means via emulation of a PCIe
REBAR capability to handle the error cases.

A vfio-pci specific ioctl interface is also cumbersome as there are
often multiple devices within the same bridge aperture and handling
them is a challenge. In the interface proposed here, expanding a
BAR potentially requires such devices to be soft-removed during the
resize operation and rescanned after, in order for all the necessary
resources to be released. A pci-sysfs interface is also more
universal than a vfio specific interface.

Please see the ABI documentation update for usage.

Cc: Christian König <christian.koenig@xxxxxxx>
Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
---

NB, I realize the read value of the syfs attribute provides two values,
the bitmap of possible sizes and the current size. There are a number
of ways to determine the current size, including stat(1) on the
resourceN file, but I found this output to be useful while developing
the interface and provides consistency with the store value to the
attribute. Suggestions welcome for better semantics.

Documentation/ABI/testing/sysfs-bus-pci | 27 +++++++
drivers/pci/pci-sysfs.c | 118 +++++++++++++++++++++++++++++++
include/linux/pci.h | 1
3 files changed, 146 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 6fc2c2efe8ab..5eea5d89c9f2 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -457,3 +457,30 @@ Description:

The file is writable if the PF is bound to a driver that
implements ->sriov_set_msix_vec_count().
+
+What: /sys/bus/pci/devices/.../resourceN_resize
+Date: August 2022
+Contact: Alex Williamson <alex.williamson@xxxxxxxxxx>
+Description:
+ These files provide an interface to PCIe Resizable BAR support.
+ A file is created for each BAR resource (N) supported by the
+ PCIe Resizable BAR extended capability of the device. Reading
+ each file exposes the capability and current setting for the
+ device, ex.
+
+ # cat resource1_resize
+ 00000000000001c0:6
+
+ The first field provides the supported sizes bitmap, where
+ bit0 = 1MB, bit1 = 2MB, bit2 = 4MB, etc. In the above example
+ the devices supports 64MB, 128MB, and 256MB BAR sizes. The
+ second field provides the current setting, the value 6
+ indicates bit6 is set, which corresponds to 64MB.
+
+ When writing the file, only the latter is used, ex.
+
+ # echo 7 > resource1_resize
+
+ This indicates to set the size value corresponding to bit 7,
+ which is 128MB. The resulting size is 2 ^ (bit# + 20). This
+ definition matches the PCIe specification of this capability.
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9ac92e6a2397..aa59a2de508f 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1143,6 +1143,7 @@ static void pci_remove_resource_files(struct pci_dev *pdev)

for (i = 0; i < PCI_STD_NUM_BARS; i++) {
struct bin_attribute *res_attr;
+ struct dev_ext_attribute *resize_attr;

res_attr = pdev->res_attr[i];
if (res_attr) {
@@ -1155,6 +1156,13 @@ static void pci_remove_resource_files(struct pci_dev *pdev)
sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
kfree(res_attr);
}
+
+ resize_attr = pdev->res_attr_resize[i];
+ if (resize_attr) {
+ sysfs_remove_file(&pdev->dev.kobj,
+ &resize_attr->attr.attr);
+ kfree(resize_attr);
+ }
}
}

@@ -1208,6 +1216,108 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
return retval;
}

+static ssize_t pci_bar_resize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct dev_ext_attribute *resize_attr =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int bar = (int)(long)resize_attr->var;
+ ssize_t ret;
+
+ pci_config_pm_runtime_get(pdev);
+
+ /*
+ * pci_rebar_get_possible_sizes() only currently reads supported sizes
+ * from the capability register and therefore returns a u32. The spec
+ * allows additional supported bits in the control register, which
+ * then exceeds 32bit. Expose a u64 to userspace for future compat.
+ */
+ ret = sysfs_emit(buf, "%016llx:%d\n",
+ (u64)pci_rebar_get_possible_sizes(pdev, bar),
+ pci_rebar_get_current_size(pdev, bar));
+
+ pci_config_pm_runtime_put(pdev);
+
+ return ret;
+}
+
+static ssize_t pci_bar_resize_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct dev_ext_attribute *resize_attr =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int ret, i, bar = (int)(long)resize_attr->var;
+ unsigned long size, flags;
+ u16 cmd;
+
+ if (kstrtoul(buf, 0, &size) < 0)
+ return -EINVAL;
+
+ device_lock(dev);
+ if (dev->driver) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ pci_config_pm_runtime_get(pdev);
+
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ pci_write_config_word(pdev, PCI_COMMAND, cmd & ~PCI_COMMAND_MEMORY);
+
+ flags = pci_resource_flags(pdev, bar);
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (pci_resource_len(pdev, i) &&
+ pci_resource_flags(pdev, i) == flags)
+ pci_release_resource(pdev, i);
+ }
+
+ ret = pci_resize_resource(pdev, bar, size);
+
+ pci_assign_unassigned_bus_resources(pdev->bus);
+
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
+
+ pci_config_pm_runtime_put(pdev);
+unlock:
+ device_unlock(dev);
+
+ return ret ? ret : count;
+}
+
+static int pci_create_resize_attr(struct pci_dev *pdev, int num)
+{
+ struct dev_ext_attribute *resize_attr;
+ char *resize_attr_name;
+ int retval;
+
+ resize_attr = kzalloc(sizeof(*resize_attr) + 17, GFP_ATOMIC);
+ if (!resize_attr)
+ return -ENOMEM;
+
+ resize_attr_name = (char *)(resize_attr + 1);
+
+ sysfs_attr_init(&resize_attr->attr.attr);
+ sprintf(resize_attr_name, "resource%d_resize", num);
+ resize_attr->attr.attr.name = resize_attr_name;
+ resize_attr->attr.attr.mode = 0600;
+ resize_attr->attr.show = pci_bar_resize_show;
+ resize_attr->attr.store = pci_bar_resize_store;
+ resize_attr->var = (void *)(long)num;
+
+ retval = sysfs_create_file(&pdev->dev.kobj, &resize_attr->attr.attr);
+ if (retval) {
+ kfree(resize_attr);
+ return retval;
+ }
+
+ pdev->res_attr_resize[num] = resize_attr;
+ return 0;
+}
+
/**
* pci_create_resource_files - create resource files in sysfs for @dev
* @pdev: dev in question
@@ -1235,6 +1345,14 @@ static int pci_create_resource_files(struct pci_dev *pdev)
pci_remove_resource_files(pdev);
return retval;
}
+
+ if (pci_rebar_get_current_size(pdev, i) >= 0) {
+ retval = pci_create_resize_attr(pdev, i);
+ if (retval) {
+ pci_remove_resource_files(pdev);
+ return retval;
+ }
+ }
}
return 0;
}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 060af91bafcd..9c4db0c5f215 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -470,6 +470,7 @@ struct pci_dev {
int rom_attr_enabled; /* Display of ROM attribute enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
+ struct dev_ext_attribute *res_attr_resize[DEVICE_COUNT_RESOURCE]; /* sysfs file for resizing BAR resources */

#ifdef CONFIG_HOTPLUG_PCI_PCIE
unsigned int broken_cmd_compl:1; /* No compl for some cmds */