[PATCH] Revert "KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control"

From: Paolo Bonzini
Date: Fri Jul 22 2022 - 06:43:45 EST


This reverts commit 03a8871add95213827e2bea84c12133ae5df952e.

Since commit 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL
VM-{Entry,Exit} control"), KVM has taken ownership of the "load
IA32_PERF_GLOBAL_CTRL" VMX entry/exit control bits, trying to set these
bits in the IA32_VMX_TRUE_{ENTRY,EXIT}_CTLS MSRs if the guest's CPUID
supports the architectural PMU (CPUID[EAX=0Ah].EAX[7:0]=1), and clear
otherwise.

This was a misguided attempt at mimicking what commit 5f76f6f5ff96
("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
2018-10-01) did for MPX. However, that commit was a workaround for
another KVM bug and not something that should be imitated. Mucking with
the VMX MSRs creates a subtle, difficult to maintain ABI as KVM must
ensure that any internal changes, e.g. to how KVM handles _any_ guest
CPUID changes, yield the same functional result. Therefore, KVM's policy
is to let userspace have full control of the guest vCPU model so long
as the host kernel is not at risk.

And that's the snag: setting the bit must not cause any harm to the host,
therefore we need to be sure that the kvm_set_msr will actually succeed.
Furthermore, it is plausible to have a hypervisor that sets the controls
unconditionally and just leaves GUEST/HOST_IA32_PERF_GLOBAL_CTRL to 0, and
we don't want to regress that case. The simplest way to handle
both issues is to skip the call to kvm_set_msr if the value of
MSR_CORE_PERF_GLOBAL_CTRL is not changing. This covers trivially the case
where the PMU is not available and the only acceptable value of the MSR is
zero, because nonzero values are filtered in nested_vmx_check_host_state
and nested_vmx_check_guest_state.

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/vmx/nested.c | 26 +++-----------------------
arch/x86/kvm/vmx/nested.h | 2 --
arch/x86/kvm/vmx/pmu_intel.c | 3 ---
3 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c1c85fd75d42..6d25de9ebefa 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2623,6 +2623,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}

if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ vmcs12->guest_ia32_perf_global_ctrl != vcpu_to_pmu(vcpu)->global_ctrl &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -4333,7 +4334,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ && vmcs12->host_ia32_perf_global_ctrl != vcpu_to_pmu(vcpu)->global_ctrl)
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl));

@@ -4822,28 +4824,6 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
return 0;
}

-void nested_vmx_pmu_refresh(struct kvm_vcpu *vcpu,
- bool vcpu_has_perf_global_ctrl)
-{
- struct vcpu_vmx *vmx;
-
- if (!nested_vmx_allowed(vcpu))
- return;
-
- vmx = to_vmx(vcpu);
- if (vcpu_has_perf_global_ctrl) {
- vmx->nested.msrs.entry_ctls_high |=
- VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
- vmx->nested.msrs.exit_ctls_high |=
- VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
- } else {
- vmx->nested.msrs.entry_ctls_high &=
- ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
- vmx->nested.msrs.exit_ctls_high &=
- ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
- }
-}
-
static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
int *ret)
{
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 129ae4e01f7c..88b00a7359e4 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -32,8 +32,6 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
-void nested_vmx_pmu_refresh(struct kvm_vcpu *vcpu,
- bool vcpu_has_perf_global_ctrl);
void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu);
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
int size);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..cfcb590afaa7 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -590,9 +590,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
bitmap_set(pmu->all_valid_pmc_idx,
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);

- nested_vmx_pmu_refresh(vcpu,
- intel_is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL));
-
if (cpuid_model_is_consistent(vcpu))
x86_perf_get_lbr(&lbr_desc->records);
else
--
2.31.1