Re: [PATCH v2 4/8] vringh: support VA with iotlb

From: Eugenio Perez Martin
Date: Fri Mar 17 2023 - 05:51:05 EST


On Thu, Mar 16, 2023 at 5:07 PM Stefano Garzarella <sgarzare@xxxxxxxxxx> wrote:
>
> On Fri, Mar 3, 2023 at 3:39 PM Eugenio Perez Martin <eperezma@xxxxxxxxxx> wrote:
> >
> > On Thu, Mar 2, 2023 at 12:35 PM Stefano Garzarella <sgarzare@xxxxxxxxxx> wrote:
> > >
> > > vDPA supports the possibility to use user VA in the iotlb messages.
> > > So, let's add support for user VA in vringh to use it in the vDPA
> > > simulators.
> > >
> > > Signed-off-by: Stefano Garzarella <sgarzare@xxxxxxxxxx>
> > > ---
> > >
> > > Notes:
> > > v2:
> > > - replace kmap_atomic() with kmap_local_page() [see previous patch]
> > > - fix cast warnings when build with W=1 C=1
> > >
> > > include/linux/vringh.h | 5 +-
> > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +-
> > > drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +-
> > > drivers/vhost/vringh.c | 247 ++++++++++++++++++++++++------
> > > 4 files changed, 205 insertions(+), 53 deletions(-)
> > >
>
> [...]
>
> >
> > It seems to me iotlb_translate_va and iotlb_translate_pa are very
> > similar, their only difference is that the argument is that iov is
> > iovec instead of bio_vec. And how to fill it, obviously.
> >
> > It would be great to merge both functions, only differing with a
> > conditional on vrh->use_va, or generics, or similar. Or, if following
> > the style of the rest of vringh code, to provide a callback to fill
> > iovec (although I like conditional more).
> >
> > However I cannot think of an easy way to perform that without long
> > macros or type erasure.
>
> Thank you for pushing me :-)
> I finally managed to avoid code duplication (partial patch attached,
> but not yet fully tested).
>
> @Jason: with this refactoring I removed copy_to_va/copy_to_pa, so I
> also avoided getu16_iotlb_va/pa.
>
> I will send the full patch in v3, but I would like to get your opinion
> first ;-)
>
>
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 0ba3ef809e48..71dd67700e36 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -1096,8 +1096,7 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
>
> static int iotlb_translate(const struct vringh *vrh,
> u64 addr, u64 len, u64 *translated,
> - struct bio_vec iov[],
> - int iov_size, u32 perm)
> + void *iov, int iov_size, bool iovec, u32 perm)

I think this is an improvement, but we're doing type erasure here. I
don't think it is a big deal since the function is not exported, it's
pretty contained in this file, so I'd ack this version too. I'm just
throwing ideas here:

a) typedef the union {iovec, bio_vec} and use that type in the parameter.

As a drawback, that union feels out of place in this file. Is this the
only place where it is needed? I don't see other similar uses in the
kernel.

b) To convert from iov to bio_iov at return
The drawback is the extra processing if the compiler is not smart
enough to inline it. I prefer the previous one but I didn't want to
omit it, just in case.

Thanks!

> {
> struct vhost_iotlb_map *map;
> struct vhost_iotlb *iotlb = vrh->iotlb;
> @@ -1107,7 +1106,7 @@ static int iotlb_translate(const struct vringh *vrh,
> spin_lock(vrh->iotlb_lock);
>
> while (len > s) {
> - u64 size, pa, pfn;
> + u64 size;
>
> if (unlikely(ret >= iov_size)) {
> ret = -ENOBUFS;
> @@ -1124,10 +1123,22 @@ static int iotlb_translate(const struct vringh *vrh,
> }
>
> size = map->size - addr + map->start;
> - pa = map->addr + addr - map->start;
> - pfn = pa >> PAGE_SHIFT;
> - bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
> - pa & (PAGE_SIZE - 1));
> + if (iovec) {
> + struct iovec *iovec = iov;
> +
> + iovec[ret].iov_len = min(len - s, size);
> + iovec[ret].iov_base = (void __user *)(unsigned long)
> + (map->addr + addr - map->start);
> + } else {
> + u64 pa = map->addr + addr - map->start;
> + u64 pfn = pa >> PAGE_SHIFT;
> + struct bio_vec *bvec = iov;
> +
> + bvec_set_page(&bvec[ret], pfn_to_page(pfn),
> + min(len - s, size),
> + pa & (PAGE_SIZE - 1));
> + }
> +
> s += size;
> addr += size;
> ++ret;
> @@ -1141,26 +1152,38 @@ static int iotlb_translate(const struct vringh *vrh,
> return ret;
> }
>
> +#define IOTLB_IOV_SIZE 16
> +
> static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
> void *src, size_t len)
> {
> u64 total_translated = 0;
>
> while (total_translated < len) {
> - struct bio_vec iov[16];
> + union {
> + struct iovec iovec[IOTLB_IOV_SIZE];
> + struct bio_vec bvec[IOTLB_IOV_SIZE];
> + } iov;
> struct iov_iter iter;
> u64 translated;
> int ret;
>
> ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
> len - total_translated, &translated,
> - iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
> + &iov, IOTLB_IOV_SIZE, vrh->use_va,
> + VHOST_MAP_RO);
> if (ret == -ENOBUFS)
> - ret = ARRAY_SIZE(iov);
> + ret = IOTLB_IOV_SIZE;
> else if (ret < 0)
> return ret;
>
> - iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
> + if (vrh->use_va) {
> + iov_iter_init(&iter, ITER_SOURCE, iov.iovec, ret,
> + translated);
> + } else {
> + iov_iter_bvec(&iter, ITER_SOURCE, iov.bvec, ret,
> + translated);
> + }
>
> ret = copy_from_iter(dst, translated, &iter);
> if (ret < 0)
> @@ -1180,20 +1203,30 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
> u64 total_translated = 0;
>
> while (total_translated < len) {
> - struct bio_vec iov[16];
> + union {
> + struct iovec iovec[IOTLB_IOV_SIZE];
> + struct bio_vec bvec[IOTLB_IOV_SIZE];
> + } iov;
> struct iov_iter iter;
> u64 translated;
> int ret;
>
> ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
> len - total_translated, &translated,
> - iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
> + &iov, IOTLB_IOV_SIZE, vrh->use_va,
> + VHOST_MAP_WO);
> if (ret == -ENOBUFS)
> - ret = ARRAY_SIZE(iov);
> + ret = IOTLB_IOV_SIZE;
> else if (ret < 0)
> return ret;
>
> - iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
> + if (vrh->use_va) {
> + iov_iter_init(&iter, ITER_DEST, iov.iovec, ret,
> + translated);
> + } else {
> + iov_iter_bvec(&iter, ITER_DEST, iov.bvec, ret,
> + translated);
> + }
>
> ret = copy_to_iter(src, translated, &iter);
> if (ret < 0)
> @@ -1210,20 +1243,32 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
> static inline int getu16_iotlb(const struct vringh *vrh,
> u16 *val, const __virtio16 *p)
> {
> - struct bio_vec iov;
> - void *kaddr, *from;
> + union {
> + struct iovec iovec;
> + struct bio_vec bvec;
> + } iov;
> + __virtio16 tmp;
> int ret;
>
> /* Atomic read is needed for getu16 */
> - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> - &iov, 1, VHOST_MAP_RO);
> + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> + NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
> if (ret < 0)
> return ret;
>
> - kaddr = kmap_local_page(iov.bv_page);
> - from = kaddr + iov.bv_offset;
> - *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
> - kunmap_local(kaddr);
> + if (vrh->use_va) {
> + ret = __get_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
> + if (ret)
> + return ret;
> + } else {
> + void *kaddr = kmap_local_page(iov.bvec.bv_page);
> + void *from = kaddr + iov.bvec.bv_offset;
> +
> + tmp = READ_ONCE(*(__virtio16 *)from);
> + kunmap_local(kaddr);
> + }
> +
> + *val = vringh16_to_cpu(vrh, tmp);
>
> return 0;
> }
> @@ -1231,20 +1276,32 @@ static inline int getu16_iotlb(const struct vringh *vrh,
> static inline int putu16_iotlb(const struct vringh *vrh,
> __virtio16 *p, u16 val)
> {
> - struct bio_vec iov;
> - void *kaddr, *to;
> + union {
> + struct iovec iovec;
> + struct bio_vec bvec;
> + } iov;
> + __virtio16 tmp;
> int ret;
>
> /* Atomic write is needed for putu16 */
> - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
> - &iov, 1, VHOST_MAP_WO);
> + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
> + NULL, &iov, 1, vrh->use_va, VHOST_MAP_RO);
> if (ret < 0)
> return ret;
>
> - kaddr = kmap_local_page(iov.bv_page);
> - to = kaddr + iov.bv_offset;
> - WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
> - kunmap_local(kaddr);
> + tmp = cpu_to_vringh16(vrh, val);
> +
> + if (vrh->use_va) {
> + ret = __put_user(tmp, (__virtio16 __user *)iov.iovec.iov_base);
> + if (ret)
> + return ret;
> + } else {
> + void *kaddr = kmap_local_page(iov.bvec.bv_page);
> + void *to = kaddr + iov.bvec.bv_offset;
> +
> + WRITE_ONCE(*(__virtio16 *)to, tmp);
> + kunmap_local(kaddr);
> + }
>
> return 0;
> }
>