Re: [PATCH 1/9] virtio: add functions for piecewise addition ofbuffers

From: Michael S. Tsirkin
Date: Tue Feb 12 2013 - 09:52:03 EST


On Tue, Feb 12, 2013 at 01:23:27PM +0100, Paolo Bonzini wrote:
> virtio device drivers translate requests from higher layer in two steps:
> a device-specific step in the device driver, and generic preparation
> of virtio direct or indirect buffers in virtqueue_add_buf. Because
> virtqueue_add_buf also accepts the outcome of the first step as a single
> struct scatterlist, drivers may need to put additional items at the
> front or back of the data scatterlists before handing it to virtqueue_add_buf.
> Because of this, virtio-scsi has to copy each request into a scatterlist
> internal to the driver. It cannot just use the one that was prepared
> by the upper SCSI layers.
>
> On top of this, virtqueue_add_buf also has the limitation of not
> supporting chained scatterlists: the buffers must be provided as an
> array of struct scatterlist. Chained scatterlist, though not supported
> on all architectures, would help for virtio-scsi where all additional
> items are placed at the front.
>
> This patch adds a different set of APIs for adding a buffer to a virtqueue.
> The new API lets you pass the buffers piecewise, wrapping multiple calls
> to virtqueue_add_sg between virtqueue_start_buf and virtqueue_end_buf.
> virtio-scsi can then call virtqueue_add_sg 3/4 times: for the request
> header, for the write buffer (if present), for the response header, and
> finally for the read buffer (again if present). It saves the copying
> and the related locking.
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> drivers/virtio/virtio_ring.c | 211 ++++++++++++++++++++++++++++++++++++++++++
> include/linux/virtio.h | 14 +++
> 2 files changed, 225 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index ffd7e7d..64184e5 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -101,6 +101,10 @@ struct vring_virtqueue
> /* Last used index we've seen. */
> u16 last_used_idx;
>
> + /* State between virtqueue_start_buf and virtqueue_end_buf. */
> + int head;
> + struct vring_desc *indirect_base, *tail;
> +
> /* How to notify other side. FIXME: commonalize hcalls! */
> void (*notify)(struct virtqueue *vq);
>
> @@ -394,6 +398,213 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
> vq->vq.num_free++;
> }
>
> +/**
> + * virtqueue_start_buf - start building buffer for the other end
> + * @vq: the struct virtqueue we're talking about.
> + * @data: the token identifying the buffer.
> + * @nents: the number of buffers that will be added

This function starts building one buffer, number of buffers
is a bit weird here.

> + * @nsg: the number of sg lists that will be added

This means number of calls to add_sg ? Not sure why this matters.
How about we pass in in_num/out_num - that is total # of sg,
same as add_buf?


> + * @gfp: how to do memory allocations (if necessary).
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted), and that a successful call is
> + * followed by one or more calls to virtqueue_add_sg, and finally a call
> + * to virtqueue_end_buf.
> + *
> + * Returns zero or a negative error (ie. ENOSPC).
> + */
> +int virtqueue_start_buf(struct virtqueue *_vq,
> + void *data,
> + unsigned int nents,
> + unsigned int nsg,
> + gfp_t gfp)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> + struct vring_desc *desc = NULL;
> + int head;
> + int ret = -ENOMEM;
> +
> + START_USE(vq);
> +
> + BUG_ON(data == NULL);
> +
> +#ifdef DEBUG
> + {
> + ktime_t now = ktime_get();
> +
> + /* No kick or get, with .1 second between? Warn. */
> + if (vq->last_add_time_valid)
> + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> + > 100);
> + vq->last_add_time = now;
> + vq->last_add_time_valid = true;
> + }
> +#endif
> +
> + BUG_ON(nents < nsg);
> + BUG_ON(nsg == 0);
> +
> + /*
> + * If the host supports indirect descriptor tables, and there is
> + * no space for direct buffers or there are multi-item scatterlists,
> + * go indirect.
> + */
> + head = vq->free_head;
> + if (vq->indirect && (nents > nsg || vq->vq.num_free < nents)) {
> + if (vq->vq.num_free == 0)
> + goto no_space;
> +
> + desc = kmalloc(nents * sizeof(struct vring_desc), gfp);
> + if (!desc)
> + goto error;
> +
> + /* We're about to use a buffer */
> + vq->vq.num_free--;
> +
> + /* Use a single buffer which doesn't continue */
> + vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
> + vq->vring.desc[head].addr = virt_to_phys(desc);
> + vq->vring.desc[head].len = nents * sizeof(struct vring_desc);
> +
> + /* Update free pointer */
> + vq->free_head = vq->vring.desc[head].next;
> + }
> +
> + /* Set token. */
> + vq->data[head] = data;
> +
> + pr_debug("Started buffer head %i for %p\n", head, vq);
> +
> + vq->indirect_base = desc;
> + vq->tail = NULL;
> + vq->head = head;
> + return 0;
> +
> +no_space:
> + ret = -ENOSPC;
> +error:
> + pr_debug("Can't add buf (%d) - nents = %i, avail = %i\n",
> + ret, nents, vq->vq.num_free);
> + END_USE(vq);
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_start_buf);
> +
> +/**
> + * virtqueue_add_sg - add sglist to buffer being built
> + * @_vq: the virtqueue for which the buffer is being built
> + * @sgl: the description of the buffer(s).
> + * @nents: the number of items to process in sgl
> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
> + *
> + * Note that, unlike virtqueue_add_buf, this function follows chained
> + * scatterlists, and stops before the @nents-th item if a scatterlist item
> + * has a marker.
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).

Hmm so if you want to add in and out, need separate calls?
in_num/out_num would be nicer?


> + */
> +void virtqueue_add_sg(struct virtqueue *_vq,
> + struct scatterlist sgl[],
> + unsigned int nents,
> + enum dma_data_direction dir)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> + unsigned int i, n;
> + struct scatterlist *sg;
> + struct vring_desc *tail;
> + u32 flags;
> +
> +#ifdef DEBUG
> + BUG_ON(!vq->in_use);
> +#endif
> +
> + BUG_ON(dir != DMA_FROM_DEVICE && dir != DMA_TO_DEVICE);
> + BUG_ON(nents == 0);
> +
> + flags = dir == DMA_FROM_DEVICE ? VRING_DESC_F_WRITE : 0;
> + flags |= VRING_DESC_F_NEXT;
> +
> + /*
> + * If using indirect descriptor tables, fill in the buffers
> + * at vq->indirect_base.
> + */
> + if (vq->indirect_base) {
> + i = 0;
> + if (likely(vq->tail))
> + i = vq->tail - vq->indirect_base + 1;
> +
> + for_each_sg(sgl, sg, nents, n) {
> + tail = &vq->indirect_base[i];
> + tail->flags = flags;
> + tail->addr = sg_phys(sg);
> + tail->len = sg->length;
> + tail->next = ++i;
> + }
> + } else {
> + BUG_ON(vq->vq.num_free < nents);
> +
> + i = vq->free_head;
> + for_each_sg(sgl, sg, nents, n) {
> + tail = &vq->vring.desc[i];
> + tail->flags = flags;
> + tail->addr = sg_phys(sg);
> + tail->len = sg->length;
> + i = tail->next;
> + vq->vq.num_free--;
> + }
> +
> + vq->free_head = i;
> + }
> + vq->tail = tail;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_add_sg);
> +
> +/**
> + * virtqueue_end_buf - expose buffer to other end
> + * @_vq: the virtqueue for which the buffer was built
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).
> + */
> +void virtqueue_end_buf(struct virtqueue *_vq)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> + unsigned int avail;
> + int head = vq->head;
> + struct vring_desc *tail = vq->tail;
> +
> +#ifdef DEBUG
> + BUG_ON(!vq->in_use);
> +#endif
> + BUG_ON(tail == NULL);
> +
> + /* The last one does not have the next flag set. */
> + tail->flags &= ~VRING_DESC_F_NEXT;
> +
> + /*
> + * Put entry in available array. Descriptors and available array
> + * need to be set before we expose the new available array entries.
> + */
> + avail = vq->vring.avail->idx & (vq->vring.num-1);
> + vq->vring.avail->ring[avail] = head;
> + virtio_wmb(vq);
> +
> + vq->vring.avail->idx++;
> + vq->num_added++;
> +
> + /*
> + * This is very unlikely, but theoretically possible. Kick
> + * just in case.
> + */
> + if (unlikely(vq->num_added == (1 << 16) - 1))
> + virtqueue_kick(&vq->vq);
> +
> + pr_debug("Added buffer head %i to %p\n", head, vq);
> + END_USE(vq);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_end_buf);
> +
> static inline bool more_used(const struct vring_virtqueue *vq)
> {
> return vq->last_used_idx != vq->vring.used->idx;
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index cf8adb1..43d6bc3 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -7,6 +7,7 @@
> #include <linux/spinlock.h>
> #include <linux/device.h>
> #include <linux/mod_devicetable.h>
> +#include <linux/dma-direction.h>
> #include <linux/gfp.h>
>
> /**
> @@ -40,6 +41,19 @@ int virtqueue_add_buf(struct virtqueue *vq,
> void *data,
> gfp_t gfp);
>
> +int virtqueue_start_buf(struct virtqueue *_vq,
> + void *data,
> + unsigned int nents,
> + unsigned int nsg,
> + gfp_t gfp);
> +
> +void virtqueue_add_sg(struct virtqueue *_vq,
> + struct scatterlist sgl[],
> + unsigned int nents,
> + enum dma_data_direction dir);
> +
> +void virtqueue_end_buf(struct virtqueue *_vq);
> +
> void virtqueue_kick(struct virtqueue *vq);
>
> bool virtqueue_kick_prepare(struct virtqueue *vq);
> --
> 1.7.1
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/