Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

From: Jakub StaroÅ
Date: Tue May 07 2019 - 16:26:54 EST


On 4/25/19 10:00 PM, Pankaj Gupta wrote:

> +void host_ack(struct virtqueue *vq)
> +{
> + unsigned int len;
> + unsigned long flags;
> + struct virtio_pmem_request *req, *req_buf;
> + struct virtio_pmem *vpmem = vq->vdev->priv;
> +
> + spin_lock_irqsave(&vpmem->pmem_lock, flags);
> + while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
> + req->done = true;
> + wake_up(&req->host_acked);
> +
> + if (!list_empty(&vpmem->req_list)) {
> + req_buf = list_first_entry(&vpmem->req_list,
> + struct virtio_pmem_request, list);
> + list_del(&vpmem->req_list);

Shouldn't it be rather `list_del(vpmem->req_list.next)`? We are trying to unlink
first element of the list and `vpmem->req_list` is just the list head.

> +int virtio_pmem_flush(struct nd_region *nd_region)
> +{
> + int err;
> + unsigned long flags;
> + struct scatterlist *sgs[2], sg, ret;
> + struct virtio_device *vdev = nd_region->provider_data;
> + struct virtio_pmem *vpmem = vdev->priv;
> + struct virtio_pmem_request *req;
> +
> + might_sleep();
> + req = kmalloc(sizeof(*req), GFP_KERNEL);
> + if (!req)
> + return -ENOMEM;
> +
> + req->done = req->wq_buf_avail = false;
> + strcpy(req->name, "FLUSH");
> + init_waitqueue_head(&req->host_acked);
> + init_waitqueue_head(&req->wq_buf);
> + sg_init_one(&sg, req->name, strlen(req->name));
> + sgs[0] = &sg;
> + sg_init_one(&ret, &req->ret, sizeof(req->ret));
> + sgs[1] = &ret;
> +
> + spin_lock_irqsave(&vpmem->pmem_lock, flags);
> + err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> + if (err) {
> + dev_err(&vdev->dev, "failed to send command to virtio pmem device\n");
> +
> + list_add_tail(&vpmem->req_list, &req->list);
> + spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> +
> + /* When host has read buffer, this completes via host_ack */
> + wait_event(req->wq_buf, req->wq_buf_avail);
> + spin_lock_irqsave(&vpmem->pmem_lock, flags);
> + }

Aren't the arguments in `list_add_tail` swapped? The element we are adding should
be first, the list should be second. Also, shouldn't we resubmit the request after
waking up from `wait_event(req->wq_buf, req->wq_buf_avail)`?

I propose rewriting it like that:

diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 66b582f751a3..ff0556b04e86 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -25,7 +25,7 @@ void host_ack(struct virtqueue *vq)
if (!list_empty(&vpmem->req_list)) {
req_buf = list_first_entry(&vpmem->req_list,
struct virtio_pmem_request, list);
- list_del(&vpmem->req_list);
+ list_del(vpmem->req_list.next);
req_buf->wq_buf_avail = true;
wake_up(&req_buf->wq_buf);
}
@@ -59,17 +59,33 @@ int virtio_pmem_flush(struct nd_region *nd_region)
sgs[1] = &ret;

spin_lock_irqsave(&vpmem->pmem_lock, flags);
- err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
- if (err) {
- dev_err(&vdev->dev, "failed to send command to virtio pmem device\n");
+ /*
+ * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual queue does not
+ * have free descriptor slots. We add the request to req_list and wait
+ * for host_ack to wake us up when free slots are available.
+ */
+ while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC)) == -ENOSPC) {
+ dev_err(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue, postponing request\n");
+ req->wq_buf_avail = false;

- list_add_tail(&vpmem->req_list, &req->list);
+ list_add_tail(&req->list, &vpmem->req_list);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);

/* When host has read buffer, this completes via host_ack */
wait_event(req->wq_buf, req->wq_buf_avail);
spin_lock_irqsave(&vpmem->pmem_lock, flags);
}
+
+ /*
+ * virtqueue_add_sgs failed with error different than -ENOSPC, we can't
+ * do anything about that.
+ */
+ if (err) {
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device, error code %d\n", err);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+ err = -EIO;
+ goto ret;
+ }
err = virtqueue_kick(vpmem->req_vq);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);


Let me know if it looks reasonable to you.

Thank you,
Jakub Staron