[PATCH 08/15] KVM: VMX: Configure list of user return MSRs at module init

From: Sean Christopherson
Date: Tue May 04 2021 - 13:18:20 EST


Configure the list of user return MSRs that are actually supported at
module init instead of reprobing the list of possible MSRs every time a
vCPU is created. Curating the list on a per-vCPU basis is pointless; KVM
is completely hosed if the set of supported MSRs changes after module init,
or if the set of MSRs differs per physical PCU.

The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and
creates corner cases that _should_ be impossible, but theoretically exist
in KVM, e.g. advertising RDTSCP to userspace without actually being able to
virtualize RDTSCP if probing MSR_TSC_AUX fails.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/vmx/vmx.c | 61 ++++++++++++++++++++++++++++--------------
arch/x86/kvm/vmx/vmx.h | 10 ++++++-
2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 42e4bbaa299a..68454b0de2b1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -461,7 +461,7 @@ static unsigned long host_idt_base;
* support this emulation, IA32_STAR must always be included in
* vmx_uret_msrs_list[], even in i386 builds.
*/
-static const u32 vmx_uret_msrs_list[] = {
+static u32 vmx_uret_msrs_list[] = {
#ifdef CONFIG_X86_64
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
@@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = {
MSR_IA32_TSX_CTRL,
};

+/*
+ * Number of user return MSRs that are actually supported in hardware.
+ * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
+ */
+static int vmx_nr_uret_msrs;
+
#if IS_ENABLED(CONFIG_HYPERV)
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
@@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
{
int i;

- for (i = 0; i < vmx->nr_uret_msrs; ++i)
+ /*
+ * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
+ * but is ordered differently. The MSR is matched against the list of
+ * supported uret MSRs using "slot", but the index that is returned is
+ * the index into guest_uret_msrs.
+ */
+ for (i = 0; i < vmx_nr_uret_msrs; ++i) {
if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
return i;
+ }
return -1;
}

@@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
goto free_vpid;
}

- BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+ for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+ vmx->guest_uret_msrs[i].data = 0;

- for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
- u32 index = vmx_uret_msrs_list[i];
- int j = vmx->nr_uret_msrs;
-
- if (kvm_probe_user_return_msr(index))
- continue;
-
- vmx->guest_uret_msrs[j].slot = i;
- vmx->guest_uret_msrs[j].data = 0;
- switch (index) {
+ switch (vmx_uret_msrs_list[i]) {
case MSR_IA32_TSX_CTRL:
/*
* TSX_CTRL_CPUID_CLEAR is handled in the CPUID
@@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
* host so that TSX remains always disabled.
*/
if (boot_cpu_has(X86_FEATURE_RTM))
- vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+ vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
else
- vmx->guest_uret_msrs[j].mask = 0;
+ vmx->guest_uret_msrs[i].mask = 0;
break;
default:
- vmx->guest_uret_msrs[j].mask = -1ull;
+ vmx->guest_uret_msrs[i].mask = -1ull;
break;
}
- ++vmx->nr_uret_msrs;
}

err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
};

+static __init void vmx_setup_user_return_msrs(void)
+{
+ u32 msr;
+ int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+ for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
+ msr = vmx_uret_msrs_list[i];
+
+ if (kvm_probe_user_return_msr(msr))
+ continue;
+
+ kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
+ vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
+ }
+}
+
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
struct desc_ptr dt;
- int r, i, ept_lpage_level;
+ int r, ept_lpage_level;

store_idt(&dt);
host_idt_base = dt.address;

- for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
- kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+ vmx_setup_user_return_msrs();

if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
return -EIO;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 008cb87ff088..d71ed8b425c5 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -245,8 +245,16 @@ struct vcpu_vmx {
u32 idt_vectoring_info;
ulong rflags;

+ /*
+ * User return MSRs are always emulated when enabled in the guest, but
+ * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+ * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+ * be loaded into hardware if those conditions aren't met.
+ * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+ * into hardware when running the guest. guest_uret_msrs[] is resorted
+ * whenever the number of "active" uret MSRs is modified.
+ */
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
- int nr_uret_msrs;
int nr_active_uret_msrs;
bool guest_uret_msrs_loaded;
#ifdef CONFIG_X86_64
--
2.31.1.527.g47e6f16901-goog