Re: [PATCH 09/15] KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list

From: Maxim Levitsky
Date: Mon May 10 2021 - 04:26:21 EST


On Tue, 2021-05-04 at 10:17 -0700, Sean Christopherson wrote:
> Explicitly flag a uret MSR as needing to be loaded into hardware instead of
> resorting the list of "active" MSRs and tracking how many MSRs in total
> need to be loaded. The only benefit to sorting the list is that the loop
> to load MSRs during vmx_prepare_switch_to_guest() doesn't need to iterate
> over all supported uret MRS, only those that are active. But that is a
> pointless optimization, as the most common case, running a 64-bit guest,
> will load the vast majority of MSRs. Not to mention that a single WRMSR is
> far more expensive than iterating over the list.
>
> Providing a stable list order obviates the need to track a given MSR's
> "slot" in the per-CPU list of user return MSRs; all lists simply use the
> same ordering. Future patches will take advantage of the stable order to
> further simplify the related code.

>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/kvm/vmx/vmx.c | 80 ++++++++++++++++++++++--------------------
> arch/x86/kvm/vmx/vmx.h | 2 +-
> 2 files changed, 42 insertions(+), 40 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 68454b0de2b1..6caabcd5037e 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -458,8 +458,9 @@ static unsigned long host_idt_base;
> * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
> * will emulate SYSCALL in legacy mode if the vendor string in guest
> * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
> - * support this emulation, IA32_STAR must always be included in
> - * vmx_uret_msrs_list[], even in i386 builds.
> + * support this emulation, MSR_STAR is included in the list for i386,
> + * but is never loaded into hardware. MSR_CSTAR is also never loaded
> + * into hardware and is here purely for emulation purposes.
> */
> static u32 vmx_uret_msrs_list[] = {
> #ifdef CONFIG_X86_64
> @@ -702,18 +703,12 @@ static bool is_valid_passthrough_msr(u32 msr)
> return r;
> }
>
> -static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
> +static inline int __vmx_find_uret_msr(u32 msr)
> {
> int i;
>
> - /*
> - * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
> - * but is ordered differently. The MSR is matched against the list of
> - * supported uret MSRs using "slot", but the index that is returned is
> - * the index into guest_uret_msrs.
> - */
> for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> - if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
> + if (vmx_uret_msrs_list[i] == msr)
> return i;
> }
> return -1;
> @@ -723,7 +718,7 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
> {
> int i;
>
> - i = __vmx_find_uret_msr(vmx, msr);
> + i = __vmx_find_uret_msr(msr);
> if (i >= 0)
> return &vmx->guest_uret_msrs[i];
> return NULL;
> @@ -732,13 +727,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
> static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> struct vmx_uret_msr *msr, u64 data)
> {
> + unsigned int slot = msr - vmx->guest_uret_msrs;
> int ret = 0;
>
> u64 old_msr_data = msr->data;
> msr->data = data;
> - if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
> + if (msr->load_into_hardware) {
> preempt_disable();
> - ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
> + ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
> preempt_enable();
> if (ret)
> msr->data = old_msr_data;
> @@ -1090,7 +1086,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
> return false;
> }
>
> - i = __vmx_find_uret_msr(vmx, MSR_EFER);
> + i = __vmx_find_uret_msr(MSR_EFER);
> if (i < 0)
> return false;
>
> @@ -1252,11 +1248,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
> */
> if (!vmx->guest_uret_msrs_loaded) {
> vmx->guest_uret_msrs_loaded = true;
> - for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
> - kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
> + for (i = 0; i < vmx_nr_uret_msrs; ++i) {
> + if (!vmx->guest_uret_msrs[i].load_into_hardware)
> + continue;
> +
> + kvm_set_user_return_msr(i,
> vmx->guest_uret_msrs[i].data,
> vmx->guest_uret_msrs[i].mask);
> -
> + }
> }
>
> if (vmx->nested.need_vmcs12_to_shadow_sync)
> @@ -1763,19 +1762,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
> vmx_clear_hlt(vcpu);
> }
>
> -static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
> +static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
> + bool load_into_hardware)
> {
> - struct vmx_uret_msr tmp;
> - int from, to;
> + struct vmx_uret_msr *uret_msr;
>
> - from = __vmx_find_uret_msr(vmx, msr);
> - if (from < 0)
> + uret_msr = vmx_find_uret_msr(vmx, msr);
> + if (!uret_msr)
> return;
> - to = vmx->nr_active_uret_msrs++;
>
> - tmp = vmx->guest_uret_msrs[to];
> - vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
> - vmx->guest_uret_msrs[from] = tmp;
> + uret_msr->load_into_hardware = load_into_hardware;
> }
>
> /*
> @@ -1785,30 +1781,36 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
> */
> static void setup_msrs(struct vcpu_vmx *vmx)
> {
> - vmx->guest_uret_msrs_loaded = false;
> - vmx->nr_active_uret_msrs = 0;
> #ifdef CONFIG_X86_64
> + bool load_syscall_msrs;
> +
> /*
> * The SYSCALL MSRs are only needed on long mode guests, and only
> * when EFER.SCE is set.
> */
> - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
> - vmx_setup_uret_msr(vmx, MSR_STAR);
> - vmx_setup_uret_msr(vmx, MSR_LSTAR);
> - vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
> - }
> + load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
> + (vmx->vcpu.arch.efer & EFER_SCE);
> +
> + vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
> + vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
> + vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
> #endif
> - if (update_transition_efer(vmx))
> - vmx_setup_uret_msr(vmx, MSR_EFER);
> + vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
>
> - if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
> - guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID))
> - vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
> + vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
> + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
> + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
>
> - vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
> + vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true);
>
> if (cpu_has_vmx_msr_bitmap())
> vmx_update_msr_bitmap(&vmx->vcpu);
> +
> + /*
> + * The set of MSRs to load may have changed, reload MSRs before the
> + * next VM-Enter.
> + */
> + vmx->guest_uret_msrs_loaded = false;
> }
>
> static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index d71ed8b425c5..16e4e457ba23 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -36,7 +36,7 @@ struct vmx_msrs {
> };
>
> struct vmx_uret_msr {
> - unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
> + bool load_into_hardware;
> u64 data;
> u64 mask;
> };

This is a very welcomed change, the old code was very complicated
to understand.

However I still feel that it would be nice to have a comment explaining
that vmx->guest_uret_msrs follow the same order now as the (eventually common)
uret msr list, and basically is a parallel array that extends the
percpu 'user_return_msrs' and the global kvm_uret_msrs_list arrays.

In fact why not to fold the vmx->guest_uret_msrs into the x86 common uret msr list?
There is nothing VMX specific in this list IMHO and SVM can use it as well,
in fact it has 'svm->tsc_aux' which is the 'data' field of a 'struct vmx_uret_msr'


Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxx>

Best regards,
Maxim Levitsky