Re: [RFC PATCH v2 2/8] vhost/vsock: rework packet allocation logic

From: Arseniy Krasnov
Date: Thu Jun 09 2022 - 08:10:07 EST


On 09.06.2022 11:38, Stefano Garzarella wrote:
> On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote:
>> For packets received from virtio RX queue, use buddy
>> allocator instead of 'kmalloc()' to be able to insert
>> such pages to user provided vma. Single call to
>> 'copy_from_iter()' replaced with per-page loop.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@xxxxxxxxxxxxxx>
>> ---
>> drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 69 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index e6c9d41db1de..0dc2229f18f7 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -58,6 +58,7 @@ struct vhost_vsock {
>>
>>     u32 guest_cid;
>>     bool seqpacket_allow;
>> +    bool zerocopy_rx_on;
>
> This is per-device, so a single socket can change the behaviour of all the sockets of this device.

Sure, my mistake

>
> Can we do something better?
>
> Maybe we can allocate the header, copy it, find the socket and check if zero-copy is enabled or not for that socket.
>
> Of course we should change or extend virtio_transport_recv_pkt() to avoid to find the socket again.

I think yes

>
>
>> };
>>
>> static u32 vhost_transport_get_local_cid(void)
>> @@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>               unsigned int out, unsigned int in)
>> {
>>     struct virtio_vsock_pkt *pkt;
>> +    struct vhost_vsock *vsock;
>>     struct iov_iter iov_iter;
>>     size_t nbytes;
>>     size_t len;
>> @@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>         return NULL;
>>     }
>>
>> -    pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>> -    if (!pkt->buf) {
>> -        kfree(pkt);
>> -        return NULL;
>> -    }
>> -
>>     pkt->buf_len = pkt->len;
>> +    vsock = container_of(vq->dev, struct vhost_vsock, dev);
>>
>> -    nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> -    if (nbytes != pkt->len) {
>> -        vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>> -               pkt->len, nbytes);
>> -        virtio_transport_free_pkt(pkt);
>> -        return NULL;
>> +    if (!vsock->zerocopy_rx_on) {
>> +        pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>> +
>> +        if (!pkt->buf) {
>> +            kfree(pkt);
>> +            return NULL;
>> +        }
>> +
>> +        pkt->slab_buf = true;
>> +        nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> +        if (nbytes != pkt->len) {
>> +            vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>> +                pkt->len, nbytes);
>> +            virtio_transport_free_pkt(pkt);
>> +            return NULL;
>> +        }
>> +    } else {
>> +        struct page *buf_page;
>> +        ssize_t pkt_len;
>> +        int page_idx;
>> +
>> +        /* This creates memory overrun, as we allocate
>> +         * at least one page for each packet.
>> +         */
>> +        buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
>> +
>> +        if (buf_page == NULL) {
>> +            kfree(pkt);
>> +            return NULL;
>> +        }
>> +
>> +        pkt->buf = page_to_virt(buf_page);
>> +
>> +        page_idx = 0;
>> +        pkt_len = pkt->len;
>> +
>> +        /* As allocated pages are not mapped, process
>> +         * pages one by one.
>> +         */
>> +        while (pkt_len > 0) {
>> +            void *mapped;
>> +            size_t to_copy;
>> +
>> +            mapped = kmap(buf_page + page_idx);
>> +
>> +            if (mapped == NULL) {
>> +                virtio_transport_free_pkt(pkt);
>> +                return NULL;
>> +            }
>> +
>> +            to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
>> +
>> +            nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
>> +            if (nbytes != to_copy) {
>> +                vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
>> +                       to_copy, nbytes);
>> +                kunmap(mapped);
>> +                virtio_transport_free_pkt(pkt);
>> +                return NULL;
>> +            }
>> +
>> +            kunmap(mapped);
>> +
>> +            pkt_len -= to_copy;
>> +            page_idx++;
>> +        }
>>     }
>>
>>     return pkt;
>> -- 
>> 2.25.1
>