Re: [PATCH] KVM: x86: Making the module parameter of vPMU more common

From: Paolo Bonzini
Date: Mon Jan 17 2022 - 12:57:23 EST


On 1/11/22 08:38, Like Xu wrote:
From: Like Xu <likexu@xxxxxxxxxxx>

The new module parameter to control PMU virtualization should apply
to Intel as well as AMD, for situations where userspace is not trusted.
If the module parameter allows PMU virtualization, there could be a
new KVM_CAP or guest CPUID bits whereby userspace can enable/disable
PMU virtualization on a per-VM basis.

If the module parameter does not allow PMU virtualization, there
should be no userspace override, since we have no precedent for
authorizing that kind of override. If it's false, other counter-based
profiling features (such as LBR including the associated CPUID bits
if any) will not be exposed.

Change its name from "pmu" to "enable_pmu" as we have temporary
variables with the same name in our code like "struct kvm_pmu *pmu".

Fixes: b1d66dad65dc ("KVM: x86/svm: Add module param to control PMU virtualization")
Suggested-by : Jim Mattson <jmattson@xxxxxxxxxx>
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
arch/x86/kvm/cpuid.c | 6 +++---
arch/x86/kvm/svm/pmu.c | 2 +-
arch/x86/kvm/svm/svm.c | 8 ++------
arch/x86/kvm/svm/svm.h | 1 -
arch/x86/kvm/vmx/capabilities.h | 4 ++++
arch/x86/kvm/vmx/pmu_intel.c | 2 +-
arch/x86/kvm/x86.c | 5 +++++
arch/x86/kvm/x86.h | 1 +
8 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0b920e12bb6d..b2ff8bfd8220 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -770,10 +770,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
perf_get_x86_pmu_capability(&cap);
/*
- * Only support guest architectural pmu on a host
- * with architectural pmu.
+ * The guest architecture pmu is only supported if the architecture
+ * pmu exists on the host and the module parameters allow it.
*/
- if (!cap.version)
+ if (!cap.version || !enable_pmu)
memset(&cap, 0, sizeof(cap));
eax.split.version_id = min(cap.version, 2);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 12d8b301065a..5aa45f13b16d 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
- if (!pmu)
+ if (!enable_pmu)
return NULL;
switch (msr) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6cb38044a860..549f73ce5ebc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -192,10 +192,6 @@ module_param(vgif, int, 0444);
static int lbrv = true;
module_param(lbrv, int, 0444);
-/* enable/disable PMU virtualization */
-bool pmu = true;
-module_param(pmu, bool, 0444);
-
static int tsc_scaling = true;
module_param(tsc_scaling, int, 0444);
@@ -4573,7 +4569,7 @@ static __init void svm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
/* AMD PMU PERFCTR_CORE CPUID */
- if (pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
/* CPUID 0x8000001F (SME/SEV features) */
@@ -4712,7 +4708,7 @@ static __init int svm_hardware_setup(void)
pr_info("LBR virtualization supported\n");
}
- if (!pmu)
+ if (!enable_pmu)
pr_info("PMU virtualization is disabled\n");
svm_set_cpu_caps();
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index daa8ca84afcc..47ef8f4a9358 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -32,7 +32,6 @@
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern bool intercept_smi;
-extern bool pmu;
/*
* Clean bits in VMCB.
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index c8029b7845b6..959b59d13b5a 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -5,6 +5,7 @@
#include <asm/vmx.h>
#include "lapic.h"
+#include "x86.h"
extern bool __read_mostly enable_vpid;
extern bool __read_mostly flexpriority_enabled;
@@ -389,6 +390,9 @@ static inline u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = 0;
+ if (!enable_pmu)
+ return perf_cap;
+
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index ffccfd9823c0..466d18fc0c5d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -487,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry)
+ if (!entry || !enable_pmu)
return;
eax.full = entry->eax;
edx.full = entry->edx;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c194a8cbd25f..bff2ff8cb35f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -187,6 +187,11 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+/* Enable/disable PMU virtualization */
+bool __read_mostly enable_pmu = true;
+EXPORT_SYMBOL_GPL(enable_pmu);
+module_param(enable_pmu, bool, 0444);
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index da7031e80f23..1ebd5a7594da 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -336,6 +336,7 @@ extern u64 host_xcr0;
extern u64 supported_xcr0;
extern u64 host_xss;
extern u64 supported_xss;
+extern bool enable_pmu;
static inline bool kvm_mpx_supported(void)
{

Queued, thanks.

Paolo