Re: [PATCH 3/5] vDPA: introduce vDPA bus

From: Michael S. Tsirkin
Date: Fri Jan 17 2020 - 07:13:54 EST


On Thu, Jan 16, 2020 at 08:42:29PM +0800, Jason Wang wrote:
> vDPA device is a device that uses a datapath which complies with the
> virtio specifications with vendor specific control path. vDPA devices
> can be both physically located on the hardware or emulated by
> software. vDPA hardware devices are usually implemented through PCIE
> with the following types:
>
> - PF (Physical Function) - A single Physical Function
> - VF (Virtual Function) - Device that supports single root I/O
> virtualization (SR-IOV). Its Virtual Function (VF) represents a
> virtualized instance of the device that can be assigned to different
> partitions
> - VDEV (Virtual Device) - With technologies such as Intel Scalable
> IOV, a virtual device composed by host OS utilizing one or more
> ADIs.
> - SF (Sub function) - Vendor specific interface to slice the Physical
> Function to multiple sub functions that can be assigned to different
> partitions as virtual devices.
>
> >From a driver's perspective, depends on how and where the DMA
> translation is done, vDPA devices are split into two types:
>
> - Platform specific DMA translation - From the driver's perspective,
> the device can be used on a platform where device access to data in
> memory is limited and/or translated. An example is a PCIE vDPA whose
> DMA request was tagged via a bus (e.g PCIE) specific way. DMA
> translation and protection are done at PCIE bus IOMMU level.
> - Device specific DMA translation - The device implements DMA
> isolation and protection through its own logic. An example is a vDPA
> device which uses on-chip IOMMU.
>
> To hide the differences and complexity of the above types for a vDPA
> device/IOMMU options and in order to present a generic virtio device
> to the upper layer, a device agnostic framework is required.
>
> This patch introduces a software vDPA bus which abstracts the
> common attributes of vDPA device, vDPA bus driver and the
> communication method (vdpa_config_ops) between the vDPA device
> abstraction and the vDPA bus driver:
>
> With the abstraction of vDPA bus and vDPA bus operations, the
> difference and complexity of the under layer hardware is hidden from
> upper layer. The vDPA bus drivers on top can use a unified
> vdpa_config_ops to control different types of vDPA device.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
> ---
> MAINTAINERS | 1 +
> drivers/virtio/Kconfig | 2 +
> drivers/virtio/Makefile | 1 +
> drivers/virtio/vdpa/Kconfig | 9 ++
> drivers/virtio/vdpa/Makefile | 2 +
> drivers/virtio/vdpa/vdpa.c | 141 ++++++++++++++++++++++++++
> include/linux/vdpa.h | 191 +++++++++++++++++++++++++++++++++++
> 7 files changed, 347 insertions(+)
> create mode 100644 drivers/virtio/vdpa/Kconfig
> create mode 100644 drivers/virtio/vdpa/Makefile
> create mode 100644 drivers/virtio/vdpa/vdpa.c
> create mode 100644 include/linux/vdpa.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d4bda9c900fa..578d2a581e3b 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -17540,6 +17540,7 @@ F: tools/virtio/
> F: drivers/net/virtio_net.c
> F: drivers/block/virtio_blk.c
> F: include/linux/virtio*.h
> +F: include/linux/vdpa.h
> F: include/uapi/linux/virtio_*.h
> F: drivers/crypto/virtio/
> F: mm/balloon_compaction.c
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index 078615cf2afc..9c4fdb64d9ac 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -96,3 +96,5 @@ config VIRTIO_MMIO_CMDLINE_DEVICES
> If unsure, say 'N'.
>
> endif # VIRTIO_MENU
> +
> +source "drivers/virtio/vdpa/Kconfig"
> diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> index 3a2b5c5dcf46..fdf5eacd0d0a 100644
> --- a/drivers/virtio/Makefile
> +++ b/drivers/virtio/Makefile
> @@ -6,3 +6,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
> obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
> obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
> +obj-$(CONFIG_VDPA) += vdpa/
> diff --git a/drivers/virtio/vdpa/Kconfig b/drivers/virtio/vdpa/Kconfig
> new file mode 100644
> index 000000000000..3032727b4d98
> --- /dev/null
> +++ b/drivers/virtio/vdpa/Kconfig
> @@ -0,0 +1,9 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +config VDPA
> + tristate
> + default n
> + help
> + Enable this module to support vDPA device that uses a
> + datapath which complies with virtio specifications with
> + vendor specific control path.
> +
> diff --git a/drivers/virtio/vdpa/Makefile b/drivers/virtio/vdpa/Makefile
> new file mode 100644
> index 000000000000..ee6a35e8a4fb
> --- /dev/null
> +++ b/drivers/virtio/vdpa/Makefile
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +obj-$(CONFIG_VDPA) += vdpa.o
> diff --git a/drivers/virtio/vdpa/vdpa.c b/drivers/virtio/vdpa/vdpa.c
> new file mode 100644
> index 000000000000..2b0e4a9f105d
> --- /dev/null
> +++ b/drivers/virtio/vdpa/vdpa.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * vDPA bus.
> + *
> + * Copyright (c) 2019, Red Hat. All rights reserved.
> + * Author: Jason Wang <jasowang@xxxxxxxxxx>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/idr.h>
> +#include <linux/vdpa.h>
> +
> +#define MOD_VERSION "0.1"
> +#define MOD_DESC "vDPA bus"
> +#define MOD_AUTHOR "Jason Wang <jasowang@xxxxxxxxxx>"
> +#define MOD_LICENSE "GPL v2"
> +
> +static DEFINE_IDA(vdpa_index_ida);
> +
> +struct device *vdpa_get_parent(struct vdpa_device *vdpa)
> +{
> + return vdpa->dev.parent;
> +}
> +EXPORT_SYMBOL(vdpa_get_parent);
> +
> +void vdpa_set_parent(struct vdpa_device *vdpa, struct device *parent)
> +{
> + vdpa->dev.parent = parent;
> +}
> +EXPORT_SYMBOL(vdpa_set_parent);
> +
> +struct vdpa_device *dev_to_vdpa(struct device *_dev)
> +{
> + return container_of(_dev, struct vdpa_device, dev);
> +}
> +EXPORT_SYMBOL_GPL(dev_to_vdpa);
> +
> +struct device *vdpa_to_dev(struct vdpa_device *vdpa)
> +{
> + return &vdpa->dev;
> +}
> +EXPORT_SYMBOL_GPL(vdpa_to_dev);
> +
> +static int vdpa_dev_probe(struct device *d)
> +{
> + struct vdpa_device *dev = dev_to_vdpa(d);
> + struct vdpa_driver *drv = drv_to_vdpa(dev->dev.driver);
> + int ret = 0;
> +
> + if (drv && drv->probe)
> + ret = drv->probe(d);
> +
> + return ret;
> +}
> +
> +static int vdpa_dev_remove(struct device *d)
> +{
> + struct vdpa_device *dev = dev_to_vdpa(d);
> + struct vdpa_driver *drv = drv_to_vdpa(dev->dev.driver);
> +
> + if (drv && drv->remove)
> + drv->remove(d);
> +
> + return 0;
> +}
> +
> +static struct bus_type vdpa_bus = {
> + .name = "vdpa",
> + .probe = vdpa_dev_probe,
> + .remove = vdpa_dev_remove,
> +};
> +
> +int register_vdpa_device(struct vdpa_device *vdpa)
> +{
> + int err;
> +
> + if (!vdpa_get_parent(vdpa))
> + return -EINVAL;
> +
> + if (!vdpa->config)
> + return -EINVAL;
> +
> + err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
> + if (err < 0)
> + return -EFAULT;
> +
> + vdpa->dev.bus = &vdpa_bus;
> + device_initialize(&vdpa->dev);
> +
> + vdpa->index = err;
> + dev_set_name(&vdpa->dev, "vdpa%u", vdpa->index);
> +
> + err = device_add(&vdpa->dev);
> + if (err)
> + ida_simple_remove(&vdpa_index_ida, vdpa->index);
> +
> + return err;
> +}
> +EXPORT_SYMBOL_GPL(register_vdpa_device);
> +
> +void unregister_vdpa_device(struct vdpa_device *vdpa)
> +{
> + int index = vdpa->index;
> +
> + device_unregister(&vdpa->dev);
> + ida_simple_remove(&vdpa_index_ida, index);
> +}
> +EXPORT_SYMBOL_GPL(unregister_vdpa_device);
> +
> +int register_vdpa_driver(struct vdpa_driver *driver)
> +{
> + driver->drv.bus = &vdpa_bus;
> + return driver_register(&driver->drv);
> +}
> +EXPORT_SYMBOL_GPL(register_vdpa_driver);
> +
> +void unregister_vdpa_driver(struct vdpa_driver *driver)
> +{
> + driver_unregister(&driver->drv);
> +}
> +EXPORT_SYMBOL_GPL(unregister_vdpa_driver);
> +
> +static int vdpa_init(void)
> +{
> + if (bus_register(&vdpa_bus) != 0)
> + panic("virtio bus registration failed");
> + return 0;
> +}
> +
> +static void __exit vdpa_exit(void)
> +{
> + bus_unregister(&vdpa_bus);
> + ida_destroy(&vdpa_index_ida);
> +}
> +core_initcall(vdpa_init);
> +module_exit(vdpa_exit);
> +
> +MODULE_VERSION(MOD_VERSION);
> +MODULE_AUTHOR(MOD_AUTHOR);
> +MODULE_LICENSE(MOD_LICENSE);
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> new file mode 100644
> index 000000000000..47760137ef66
> --- /dev/null
> +++ b/include/linux/vdpa.h
> @@ -0,0 +1,191 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_VDPA_H
> +#define _LINUX_VDPA_H
> +
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/vhost_iotlb.h>
> +
> +/**
> + * vDPA callback definition.
> + * @callback: interrupt callback function
> + * @private: the data passed to the callback function
> + */
> +struct vdpa_callback {
> + irqreturn_t (*callback)(void *data);
> + void *private;
> +};
> +
> +/**
> + * vDPA device - representation of a vDPA device
> + * @dev: underlying device
> + * @config: the configuration ops for this device.
> + * @index: device index
> + */
> +struct vdpa_device {
> + struct device dev;
> + const struct vdpa_config_ops *config;
> + int index;
> +};
> +
> +/**
> + * vDPA_config_ops - operations for configuring a vDPA device.
> + * Note: vDPA device drivers are required to implement all of the
> + * operations unless it is optional mentioned in the following list.
> + * @set_vq_address: Set the address of virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * @desc_area: address of desc area
> + * @driver_area: address of driver area
> + * @device_area: address of device area
> + * Returns integer: success (0) or error (< 0)
> + * @set_vq_num: Set the size of virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * @num: the size of virtqueue
> + * @kick_vq: Kick the virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index


This seems wrong: kicks are data path so drivers should not
do it in a vendor specific way. How about an API
returning the device/resource that can then be
mapped as appropriate?


> + * @set_vq_cb: Set the interrupt callback function for
> + * a virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * @cb: virtio-vdev interrupt callback structure


Calls are data path too, I think we need some way to map MSI?

> + * @set_vq_ready: Set ready status for a virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * @ready: ready (true) not ready(false)
> + * @get_vq_ready: Get ready status for a virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * Returns boolean: ready (true) or not (false)
> + * @set_vq_state: Set the state for a virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * @state: virtqueue state (last_avail_idx)
> + * Returns integer: success (0) or error (< 0)
> + * @get_vq_state: Get the state for a virtqueue
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * Returns virtqueue state (last_avail_idx)
> + * @get_vq_align: Get the virtqueue align requirement
> + * for the device
> + * @vdev: vdpa device
> + * Returns virtqueue algin requirement


Where does this come from? Spec dictates that for a data path,
vendor specific values for this will break userspace ...

> + * @get_features: Get virtio features supported by the device
> + * @vdev: vdpa device
> + * Returns the virtio features support by the
> + * device
> + * @set_features: Set virtio features supported by the driver
> + * @vdev: vdpa device
> + * @features: feature support by the driver
> + * Returns integer: success (0) or error (< 0)
> + * @set_config_cb: Set the config interrupt callback
> + * @vdev: vdpa device
> + * @cb: virtio-vdev interrupt callback structure
> + * @get_vq_num_max: Get the max size of virtqueue
> + * @vdev: vdpa device
> + * Returns u16: max size of virtqueue


I'm not sure this has to be uniform across VQs.

> + * @get_device_id: Get virtio device id
> + * @vdev: vdpa device
> + * Returns u32: virtio device id


is this the virtio ID? PCI ID?

> + * @get_vendor_id: Get id for the vendor that provides this device
> + * @vdev: vdpa device
> + * Returns u32: virtio vendor id

what's the idea behind this? userspace normally doesn't interact with
this ... debugging?

> + * @get_status: Get the device status
> + * @vdev: vdpa device
> + * Returns u8: virtio device status
> + * @set_status: Set the device status
> + * @vdev: vdpa device
> + * @status: virtio device status
> + * @get_config: Read from device specific configuration space
> + * @vdev: vdpa device
> + * @offset: offset from the beginning of
> + * configuration space
> + * @buf: buffer used to read to
> + * @len: the length to read from
> + * configuration space
> + * @set_config: Write to device specific configuration space
> + * @vdev: vdpa device
> + * @offset: offset from the beginning of
> + * configuration space
> + * @buf: buffer used to write from
> + * @len: the length to write to
> + * configuration space
> + * @get_generation: Get device config generation (optional)
> + * @vdev: vdpa device
> + * Returns u32: device generation
> + * @set_map: Set device memory mapping, optional
> + * and only needed for device that using
> + * device specific DMA translation
> + * (on-chip IOMMU)
> + * @vdev: vdpa device
> + * @iotlb: vhost memory mapping to be
> + * used by the vDPA
> + * Returns integer: success (0) or error (< 0)

OK so any change just swaps in a completely new mapping?
Wouldn't this make minor changes such as memory hotplug
quite expensive?

> + */
> +struct vdpa_config_ops {
> + /* Virtqueue ops */
> + int (*set_vq_address)(struct vdpa_device *vdev,
> + u16 idx, u64 desc_area, u64 driver_area,
> + u64 device_area);
> + void (*set_vq_num)(struct vdpa_device *vdev, u16 idx, u32 num);
> + void (*kick_vq)(struct vdpa_device *vdev, u16 idx);
> + void (*set_vq_cb)(struct vdpa_device *vdev, u16 idx,
> + struct vdpa_callback *cb);
> + void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready);
> + bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx);
> + int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state);
> + u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx);
> +
> + /* Device ops */
> + u16 (*get_vq_align)(struct vdpa_device *vdev);
> + u64 (*get_features)(struct vdpa_device *vdev);
> + int (*set_features)(struct vdpa_device *vdev, u64 features);
> + void (*set_config_cb)(struct vdpa_device *vdev,
> + struct vdpa_callback *cb);
> + u16 (*get_vq_num_max)(struct vdpa_device *vdev);
> + u32 (*get_device_id)(struct vdpa_device *vdev);
> + u32 (*get_vendor_id)(struct vdpa_device *vdev);
> + u8 (*get_status)(struct vdpa_device *vdev);
> + void (*set_status)(struct vdpa_device *vdev, u8 status);
> + void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
> + void *buf, unsigned int len);
> + void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
> + const void *buf, unsigned int len);
> + u32 (*get_generation)(struct vdpa_device *vdev);
> +
> + /* Mem table */
> + int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
> +};
> +
> +int register_vdpa_device(struct vdpa_device *vdpa);
> +void unregister_vdpa_device(struct vdpa_device *vdpa);
> +
> +struct device *vdpa_get_parent(struct vdpa_device *vdpa);
> +void vdpa_set_parent(struct vdpa_device *vdpa, struct device *parent);
> +
> +struct vdpa_device *dev_to_vdpa(struct device *_dev);
> +struct device *vdpa_to_dev(struct vdpa_device *vdpa);
> +
> +/**
> + * vdpa_driver - operations for a vDPA driver
> + * @driver: underlying device driver
> + * @probe: the function to call when a device is found. Returns 0 or -errno.
> + * @remove: the function to call when a device is removed.
> + */
> +struct vdpa_driver {
> + struct device_driver drv;
> + int (*probe)(struct device *dev);
> + void (*remove)(struct device *dev);
> +};
> +
> +int register_vdpa_driver(struct vdpa_driver *drv);
> +void unregister_vdpa_driver(struct vdpa_driver *drv);
> +
> +static inline struct vdpa_driver *drv_to_vdpa(struct device_driver *drv)
> +{
> + return container_of(drv, struct vdpa_driver, drv);
> +}
> +
> +#endif /* _LINUX_VDPA_H */
> --
> 2.19.1