Re: [RFC PATCH v2] KVM: PPC: vfio kvm device: support spapr tce

From: Alex Williamson
Date: Tue Nov 05 2013 - 16:32:55 EST


On Tue, 2013-11-05 at 19:05 +1100, Alexey Kardashevskiy wrote:
> Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>
> ---
>
> Changes:
> v2:
> * it does not try to introduce a realmode search function.
> Instead, liobn-to-iommu-group lookup is done by VFIO KVM device
> in virtual mode and the result (iommu_group pointer) is cached
> in kvm_arch so the realmode handlers do not use VFIO KVM device for that.
> And the iommu groups get released on KVM termination.
>
> I tried this, seems viable.
>
> Did not I miss anything? Thanks.

A commit message ;)

> ---
> arch/powerpc/include/asm/kvm_host.h | 3 ++
> arch/powerpc/kvm/Kconfig | 1 +
> arch/powerpc/kvm/Makefile | 3 ++
> include/linux/vfio.h | 3 ++
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/vfio.c | 74 +++++++++++++++++++++++++++++++++++++
> 6 files changed, 85 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 48dbe8b..e1163d7 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -293,6 +293,9 @@ struct kvm_arch {
> #ifdef CONFIG_KVM_XICS
> struct kvmppc_xics *xics;
> #endif
> +#ifdef CONFIG_KVM_VFIO
> + struct kvm_vfio *vfio;
> +#endif
> };
>
> /*
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index 61b3535..d1b7f64 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -60,6 +60,7 @@ config KVM_BOOK3S_64
> select KVM_BOOK3S_64_HANDLER
> select KVM
> select SPAPR_TCE_IOMMU
> + select KVM_VFIO
> ---help---
> Support running unmodified book3s_64 and book3s_32 guest kernels
> in virtual machines on book3s_64 host processors.
> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
> index 6646c95..2438d2e 100644
> --- a/arch/powerpc/kvm/Makefile
> +++ b/arch/powerpc/kvm/Makefile
> @@ -87,6 +87,9 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
> kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
> book3s_xics.o
>
> +kvm-book3s_64-objs-$(CONFIG_KVM_VFIO) += \
> + $(KVM)/vfio.o \
> +
> kvm-book3s_64-module-objs := \
> $(KVM)/kvm_main.o \
> $(KVM)/eventfd.o \
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 24579a0..681e19b 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -97,4 +97,7 @@ extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
> extern void vfio_group_put_external_user(struct vfio_group *group);
> extern int vfio_external_user_iommu_id(struct vfio_group *group);
>
> +extern struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
> + unsigned long liobn);
> +

Nope, this doesn't go in vfio.h, it's a function provided by kvm. It
should be named as such too, kvm_vfio_... It also depends on both
CONFIG_KVM_VFIO and CONFIG_SPAPR_TCE_IOMMU and needs stub version
otherwise. Is just _liobn specific enough or does it need a spapr_tce
thrown in to avoid confusion with embedded ppc folks?

> #endif /* VFIO_H */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7c1a349..a74ad16 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -847,6 +847,7 @@ struct kvm_device_attr {
> #define KVM_DEV_VFIO_GROUP 1
> #define KVM_DEV_VFIO_GROUP_ADD 1
> #define KVM_DEV_VFIO_GROUP_DEL 2
> +#define KVM_DEV_VFIO_SPAPR_TCE_LIOBN 2

I wonder if it would be better architecturally if this was an attribute
rather than a new group, ex:

#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE_LIOBN 3

It's a mouthful, but we are setting an attribute of a VFIO group, so it
makes sense. kvm_device_attr.addr would then need to point to a struct
containing both the fd and liobn.

Whatever we come up with need a documentation addition in
Documentation/virtual/kvm/devices/vfio.txt.

>
> /*
> * ioctls for VM fds
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index ca4260e..f9271d5 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -22,6 +22,9 @@
> struct kvm_vfio_group {
> struct list_head node;
> struct vfio_group *vfio_group;
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> + uint64_t liobn;

Why is liobn an unsigned long in the exported function but a uint64_t
here?

> +#endif
> };
>
> struct kvm_vfio {
> @@ -188,12 +191,76 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
> return -ENXIO;
> }
>
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> +static int kvm_vfio_set_spapr_tce_liobn(struct kvm_device *dev,
> + long attr, u64 arg)
> +{
> + struct kvm_vfio *kv = dev->private;
> + struct vfio_group *vfio_group;
> + struct kvm_vfio_group *kvg;
> + void __user *argp = (void __user *)arg;
> + struct fd f;
> + int32_t fd;
> + uint64_t liobn = attr;
> +
> + if (get_user(fd, (int32_t __user *)argp))
> + return -EFAULT;
> +
> + f = fdget(fd);
> + if (!f.file)
> + return -EBADF;
> +
> + vfio_group = kvm_vfio_group_get_external_user(f.file);
> + fdput(f);


Not sure why you dropped this from the example of kvm_vfio_set_group:

if (IS_ERR(vfio_group))
return PTR_ERR(vfio_group);

You're also ignoring kv->lock.

> +
> + list_for_each_entry(kvg, &kv->group_list, node) {
> + if (kvg->vfio_group == vfio_group) {
> + WARN_ON(kvg->liobn);

Userspace should not be able to trigger a WARN this easily. Return
EBUSY if it's an error, otherwise let it go.

> + kvg->liobn = liobn;
> + kvm_vfio_group_put_external_user(vfio_group);
> + return 0;
> + }
> + }
> +
> + kvm_vfio_group_put_external_user(vfio_group);
> +
> + return -ENXIO;
> +}
> +
> +struct iommu_group *vfio_find_group_by_liobn(struct kvm *kvm,
> + unsigned long liobn)

As mentioned, kvm_vfio_...

> +{
> + struct kvm_vfio_group *kvg;
> +
> + if (!kvm->arch.vfio)
> + return NULL;

If this is already a slow path you can avoid stashing this pointer and
search kvm->devices for the matching ops struct.


kv->lock...

> +
> + list_for_each_entry(kvg, &kvm->arch.vfio->group_list, node) {
> + if (kvg->liobn == liobn) {
> + int group_id = vfio_external_user_iommu_id(
> + kvg->vfio_group);
> + struct iommu_group *grp =
> + iommu_group_get_by_id(group_id);

nit, ugly line wrapping. Where's the iommu_group_put() done?

> + return grp;

So you've now got an liobn to iommu_group mapping cached away somewhere,
what happens when a group is removed? Would it be invalid for userspace
to re-use the liobn? Do we need a way to invalidate your cached entry
and perhaps do an iommu_group_put()?

This version is actually plausible, so big improvement from v1! Thanks,

Alex

> + }
> + }
> +
> + return NULL;
> +}
> +EXPORT_SYMBOL_GPL(vfio_find_group_by_liobn);
> +#endif
> +
> static int kvm_vfio_set_attr(struct kvm_device *dev,
> struct kvm_device_attr *attr)
> {
> switch (attr->group) {
> case KVM_DEV_VFIO_GROUP:
> return kvm_vfio_set_group(dev, attr->attr, attr->addr);
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> + case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
> + return kvm_vfio_set_spapr_tce_liobn(dev, attr->attr,
> + attr->addr);
> +#endif
> }
>
> return -ENXIO;
> @@ -211,6 +278,10 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
> }
>
> break;
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> + case KVM_DEV_VFIO_SPAPR_TCE_LIOBN:
> + return 0;
> +#endif
> }
>
> return -ENXIO;
> @@ -251,6 +322,9 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
> mutex_init(&kv->lock);
>
> dev->private = kv;
> +#ifdef CONFIG_SPAPR_TCE_IOMMU
> + dev->kvm->arch.vfio = kv;
> +#endif
>
> return 0;
> }



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/