Re: [PATCH v3 19/22] kvm: x86: Get/set expanded xstate buffer

From: Sean Christopherson
Date: Tue Dec 28 2021 - 19:38:42 EST


Shortlog needs to have a verb somewhere.

On Wed, Dec 22, 2021, Jing Liu wrote:
> From: Guang Zeng <guang.zeng@xxxxxxxxx>
>
> When AMX is enabled it requires a larger xstate buffer than
> the legacy hardcoded 4KB one. Exising kvm ioctls

Existing

> (KVM_[G|S]ET_XSAVE under KVM_CAP_XSAVE) are not suitable for
> this purpose.

...

> Reuse KVM_SET_XSAVE for both old/new formats by reimplementing it to
> do properly-sized memdup_user() based on the guest fpu container.

I'm confused, the first sentence says KVM_SET_XSAVE isn't suitable, the second
says it can be reused with minimal effort.

> Also, update the api doc with the new KVM_GET_XSAVE2 ioctl.

...

> @@ -5367,7 +5382,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> break;
> }
> case KVM_SET_XSAVE: {
> - u.xsave = memdup_user(argp, sizeof(*u.xsave));
> + int size = vcpu->arch.guest_fpu.uabi_size;

IIUC, reusing KVM_SET_XSAVE works by requiring that userspace use KVM_GET_XSAVE2
if userspace has expanded the guest FPU size by exposing relevant features to
the guest via guest CPUID. If so, then that needs to be enforced in KVM_GET_XSAVE,
otherwise userspace will get subtle corruption by invoking the wrong ioctl, e.g.

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c9606380bca..5d2acbd52df5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5386,6 +5386,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XSAVE: {
+ r -EINVAL;
+ if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
+ break;
+
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
if (!u.xsave)

> +
> + u.xsave = memdup_user(argp, size);
> if (IS_ERR(u.xsave)) {
> r = PTR_ERR(u.xsave);
> goto out_nofree;
> @@ -5376,6 +5393,26 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
> break;
> }
> +
> + case KVM_GET_XSAVE2: {
> + int size = vcpu->arch.guest_fpu.uabi_size;
> +
> + u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
> + if (!u.xsave) {
> + r = -ENOMEM;

I hate the odd patterns in this code as much as anyone, but for better or worse
the style throughout is:

r = -ENOMEM;
u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (u.xsave)
break;

> + break;
> + }
> +
> + kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
> +
> + if (copy_to_user(argp, u.xsave, size)) {
> + r = -EFAULT;
> + break;

Same style thing here.

> + }
> + r = 0;
> + break;
> + }
> +
> case KVM_GET_XCRS: {
> u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
> r = -ENOMEM;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 1daa45268de2..9d1c01669560 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1131,6 +1131,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
> #define KVM_CAP_ARM_MTE 205
> #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
> +#define KVM_CAP_XSAVE2 207
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -1610,6 +1611,9 @@ struct kvm_enc_region {
> #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
> #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
>
> +/* Available with KVM_CAP_XSAVE2 */
> +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
> +
> struct kvm_s390_pv_sec_parm {
> __u64 origin;
> __u64 length;
> --
> 2.27.0
>