Re: [PATCH v2 3/3] KVM: VMX: enable guest access to LMCE related MSRs

From: Haozhong Zhang
Date: Thu Jun 16 2016 - 06:50:01 EST


On 06/16/16 12:04, Paolo Bonzini wrote:
>
>
> On 16/06/2016 08:05, Haozhong Zhang wrote:
> > From: Ashok Raj <ashok.raj@xxxxxxxxx>
> >
> > On Intel platforms, this patch adds LMCE to KVM MCE supported
> > capabilities and handles guest access to LMCE related MSRs.
> >
> > Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> > [Haozhong: macro KVM_MCE_CAP_SUPPORTED => variable kvm_mce_cap_supported
> > Only enable LMCE on Intel platform
> > Check MSR_IA32_FEATURE_CONTROL when handling guest
> > access to MSR_IA32_MCG_EXT_CTL]
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
> > ---
> > arch/x86/include/asm/kvm_host.h | 5 +++++
> > arch/x86/kvm/vmx.c | 36 +++++++++++++++++++++++++++++++++++-
> > arch/x86/kvm/x86.c | 15 +++++++++------
> > 3 files changed, 49 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index e0fbe7e..75defa6 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -598,6 +598,7 @@ struct kvm_vcpu_arch {
> > u64 mcg_cap;
> > u64 mcg_status;
> > u64 mcg_ctl;
> > + u64 mcg_ext_ctl;
> > u64 *mce_banks;
> >
> > /* Cache MMIO info */
> > @@ -1005,6 +1006,8 @@ struct kvm_x86_ops {
> > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
> > uint32_t guest_irq, bool set);
> > void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> > +
> > + void (*setup_mce)(struct kvm_vcpu *vcpu);
> > };
> >
> > struct kvm_arch_async_pf {
> > @@ -1077,6 +1080,8 @@ extern u8 kvm_tsc_scaling_ratio_frac_bits;
> > /* maximum allowed value of TSC scaling ratio */
> > extern u64 kvm_max_tsc_scaling_ratio;
> >
> > +extern u64 kvm_mce_cap_supported;
> > +
> > enum emulation_result {
> > EMULATE_DONE, /* no further processing */
> > EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 1dc89c5..42db42e 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -638,7 +638,7 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
> > * feature_control_valid_bits_add/del(), so it's not included here.
> > */
> > #define FEATURE_CONTROL_MAX_VALID_BITS \
> > - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX
> > + (FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | FEATURE_CONTROL_LMCE)
> >
> > static void feature_control_valid_bits_add(struct kvm_vcpu *vcpu, uint64_t bits)
> > {
> > @@ -2905,6 +2905,15 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
> > return valid_bits && !(val & ~valid_bits);
> > }
> >
> > +static inline bool vmx_mcg_ext_ctrl_msr_present(struct kvm_vcpu *vcpu,
> > + bool host_initiated)
> > +{
> > + return (vcpu->arch.mcg_cap & MCG_LMCE_P) &&
>
> Checking MCG_LMCE_P is unnecessary, because you cannot set
> FEATURE_CONTROL_LMCE unless MCG_LMCE_P is present.
>
> You can just inline this function in the callers, it's simpler.

I'll remove the first line check and inline the other parts.

>
> > + (host_initiated ||
> > + (to_vmx(vcpu)->msr_ia32_feature_control &
> > + FEATURE_CONTROL_LMCE));
> > +}
> > +
> > /*
> > * Reads an msr value (of 'msr_index') into 'pdata'.
> > * Returns 0 on success, non-0 otherwise.
> > @@ -2946,6 +2955,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > return 1;
> > msr_info->data = vmcs_read64(GUEST_BNDCFGS);
> > break;
> > + case MSR_IA32_MCG_EXT_CTL:
> > + if (!vmx_mcg_ext_ctrl_msr_present(vcpu,
> > + msr_info->host_initiated))
> > + return 1;
> > + msr_info->data = vcpu->arch.mcg_ext_ctl;
> > + break;
> > case MSR_IA32_FEATURE_CONTROL:
> > if (!vmx_feature_control_msr_valid(vcpu, 0))
> > return 1;
> > @@ -3039,6 +3054,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > case MSR_IA32_TSC_ADJUST:
> > ret = kvm_set_msr_common(vcpu, msr_info);
> > break;
> > + case MSR_IA32_MCG_EXT_CTL:
> > + if (!vmx_mcg_ext_ctrl_msr_present(vcpu,
> > + msr_info->host_initiated) ||
> > + (data & ~MCG_EXT_CTL_LMCE_EN))
> > + return 1;
> > + vcpu->arch.mcg_ext_ctl = data;
> > + break;
> > case MSR_IA32_FEATURE_CONTROL:
> > if (!vmx_feature_control_msr_valid(vcpu, data) ||
> > (to_vmx(vcpu)->msr_ia32_feature_control &
> > @@ -6433,6 +6455,8 @@ static __init int hardware_setup(void)
> >
> > kvm_set_posted_intr_wakeup_handler(wakeup_handler);
> >
> > + kvm_mce_cap_supported |= MCG_LMCE_P;
>
> Ah, so virtual LMCE is available on all processors! This is
> interesting, but it also makes it more complicated to handle in QEMU; a
> new QEMU generally doesn't require a new kernel.
>

My QMEU patch checks KVM MCE capabilities before enabling LMCE (See
lmce_supported() and mce_init() in QEMU Patch 1), so either new QEMU
on old kernel or old QEMU on new kernel will work.

Haozhong

> Eduardo, any ideas?
>
> Thanks,
>
> Paolo
>
> > return alloc_kvm_area();
> >
> > out8:
> > @@ -10950,6 +10974,14 @@ out:
> > return ret;
> > }
> >
> > +static void vmx_setup_mce(struct kvm_vcpu *vcpu)
> > +{
> > + if (vcpu->arch.mcg_cap & MCG_LMCE_P)
> > + feature_control_valid_bits_add(vcpu, FEATURE_CONTROL_LMCE);
> > + else
> > + feature_control_valid_bits_del(vcpu, FEATURE_CONTROL_LMCE);
> > +}
> > +
> > static struct kvm_x86_ops vmx_x86_ops = {
> > .cpu_has_kvm_support = cpu_has_kvm_support,
> > .disabled_by_bios = vmx_disabled_by_bios,
> > @@ -11074,6 +11106,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
> > .pmu_ops = &intel_pmu_ops,
> >
> > .update_pi_irte = vmx_update_pi_irte,
> > +
> > + .setup_mce = vmx_setup_mce,
> > };
> >
> > static int __init vmx_init(void)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index bf22721..5bf76ab 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -70,7 +70,8 @@
> >
> > #define MAX_IO_MSRS 256
> > #define KVM_MAX_MCE_BANKS 32
> > -#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> > +u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
> > +EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
> >
> > #define emul_to_vcpu(ctxt) \
> > container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
> > @@ -983,6 +984,7 @@ static u32 emulated_msrs[] = {
> > MSR_IA32_MISC_ENABLE,
> > MSR_IA32_MCG_STATUS,
> > MSR_IA32_MCG_CTL,
> > + MSR_IA32_MCG_EXT_CTL,
> > MSR_IA32_SMBASE,
> > };
> >
> > @@ -2684,11 +2686,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
> > break;
> > }
> > case KVM_X86_GET_MCE_CAP_SUPPORTED: {
> > - u64 mce_cap;
> > -
> > - mce_cap = KVM_MCE_CAP_SUPPORTED;
> > r = -EFAULT;
> > - if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
> > + if (copy_to_user(argp, &kvm_mce_cap_supported,
> > + sizeof(kvm_mce_cap_supported)))
> > goto out;
> > r = 0;
> > break;
> > @@ -2866,7 +2866,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
> > r = -EINVAL;
> > if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
> > goto out;
> > - if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
> > + if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
> > goto out;
> > r = 0;
> > vcpu->arch.mcg_cap = mcg_cap;
> > @@ -2876,6 +2876,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
> > /* Init IA32_MCi_CTL to all 1s */
> > for (bank = 0; bank < bank_num; bank++)
> > vcpu->arch.mce_banks[bank*4] = ~(u64)0;
> > +
> > + if (kvm_x86_ops->setup_mce)
> > + kvm_x86_ops->setup_mce(vcpu);
> > out:
> > return r;
> > }
> >