Re: [PATCH 08/15] KVM: VMX: Configure list of user return MSRs at module init

From: Maxim Levitsky
Date: Mon May 10 2021 - 04:23:41 EST


On Tue, 2021-05-04 at 10:17 -0700, Sean Christopherson wrote:
> Configure the list of user return MSRs that are actually supported at
> module init instead of reprobing the list of possible MSRs every time a
> vCPU is created. Curating the list on a per-vCPU basis is pointless; KVM
> is completely hosed if the set of supported MSRs changes after module init,
> or if the set of MSRs differs per physical PCU.
>
> The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and
> creates corner cases that _should_ be impossible, but theoretically exist
> in KVM, e.g. advertising RDTSCP to userspace without actually being able to
> virtualize RDTSCP if probing MSR_TSC_AUX fails.
>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/kvm/vmx/vmx.c | 61 ++++++++++++++++++++++++++++--------------
> arch/x86/kvm/vmx/vmx.h | 10 ++++++-
> 2 files changed, 50 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 42e4bbaa299a..68454b0de2b1 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -461,7 +461,7 @@ static unsigned long host_idt_base;
> * support this emulation, IA32_STAR must always be included in
> * vmx_uret_msrs_list[], even in i386 builds.
> */
> -static const u32 vmx_uret_msrs_list[] = {
> +static u32 vmx_uret_msrs_list[] = {
> #ifdef CONFIG_X86_64
> MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
> #endif
> @@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = {
> MSR_IA32_TSX_CTRL,
> };
>
> +/*
> + * Number of user return MSRs that are actually supported in hardware.
> + * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
> + */
> +static int vmx_nr_uret_msrs;
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> static bool __read_mostly enlightened_vmcs = true;
> module_param(enlightened_vmcs, bool, 0444);
> @@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
> {
> int i;
>
> - for (i = 0; i < vmx->nr_uret_msrs; ++i)
> + /*
> + * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
> + * but is ordered differently. The MSR is matched against the list of
> + * supported uret MSRs using "slot", but the index that is returned is
> + * the index into guest_uret_msrs.
> + */
> + for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
> return i;
> + }
> return -1;
> }
>
> @@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
> goto free_vpid;
> }
>
> - BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
> + for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> + vmx->guest_uret_msrs[i].data = 0;
>
> - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
> - u32 index = vmx_uret_msrs_list[i];
> - int j = vmx->nr_uret_msrs;
> -
> - if (kvm_probe_user_return_msr(index))
> - continue;
> -
> - vmx->guest_uret_msrs[j].slot = i;
I don't see anything initalizing the .slot after this patch.
Now this code is removed later which masks this bug,
but for the bisect sake, I think that this patch
should still be fixed.

Other than this minor bug:

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
Maxim Levitsky



> - vmx->guest_uret_msrs[j].data = 0;
> - switch (index) {
> + switch (vmx_uret_msrs_list[i]) {
> case MSR_IA32_TSX_CTRL:
> /*
> * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
> @@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
> * host so that TSX remains always disabled.
> */
> if (boot_cpu_has(X86_FEATURE_RTM))
> - vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
> + vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
> else
> - vmx->guest_uret_msrs[j].mask = 0;
> + vmx->guest_uret_msrs[i].mask = 0;
> break;
> default:
> - vmx->guest_uret_msrs[j].mask = -1ull;
> + vmx->guest_uret_msrs[i].mask = -1ull;
> break;
> }
> - ++vmx->nr_uret_msrs;
> }
>
> err = alloc_loaded_vmcs(&vmx->vmcs01);
> @@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
> };
>
> +static __init void vmx_setup_user_return_msrs(void)
> +{
> + u32 msr;
> + int i;
> +
> + BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
> +
> + for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
> + msr = vmx_uret_msrs_list[i];
> +
> + if (kvm_probe_user_return_msr(msr))
> + continue;
> +
> + kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
> + vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
> + }
> +}
> +
> static __init int hardware_setup(void)
> {
> unsigned long host_bndcfgs;
> struct desc_ptr dt;
> - int r, i, ept_lpage_level;
> + int r, ept_lpage_level;
>
> store_idt(&dt);
> host_idt_base = dt.address;
>
> - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
> - kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
> + vmx_setup_user_return_msrs();
>
> if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
> return -EIO;
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 008cb87ff088..d71ed8b425c5 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -245,8 +245,16 @@ struct vcpu_vmx {
> u32 idt_vectoring_info;
> ulong rflags;
>
> + /*
> + * User return MSRs are always emulated when enabled in the guest, but
> + * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
> + * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
> + * be loaded into hardware if those conditions aren't met.
> + * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
> + * into hardware when running the guest. guest_uret_msrs[] is resorted
> + * whenever the number of "active" uret MSRs is modified.
> + */
> struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
> - int nr_uret_msrs;
> int nr_active_uret_msrs;
> bool guest_uret_msrs_loaded;
> #ifdef CONFIG_X86_64