[PATCH v2] KVM: x86/pmu: Fix emulation on Intel counters' bit width

From: Like Xu
Date: Wed Mar 22 2023 - 05:31:53 EST


From: Like Xu <likexu@xxxxxxxxxxx>

Per Intel SDM, the bit width of a PMU counter is specified via CPUID
only if the vCPU has FW_WRITE[bit 13] on IA32_PERF_CAPABILITIES.
When the FW_WRITE bit is not set, only EAX is valid and out-of-bounds
bits accesses do not generate #GP. Conversely when this bit is set, #GP
for out-of-bounds bits accesses will also appear on the fixed counters.
vPMU currently does not support emulation of bit widths lower than 32
bits or higher than its host capability.

Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
Previous:
https://lore.kernel.org/kvm/20230316113312.54714-1-likexu@xxxxxxxxxxx/

V1 -> V2 Changelog:
- Apply #GP rule to fixed counetrs when guest has FW_WRITE;
- Apply signed rule to fixed counetrs when guest doesn't have FW_WRITE;
- Counters' bit width set by cpuid cannot be less than 32 bits;

arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
1 file changed, 10 insertions(+)

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index e8a3be0b9df9..d38b820d6b9e 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -470,6 +470,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
pmc_update_sample_period(pmc);
return 0;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ if (fw_writes_is_enabled(vcpu)) {
+ if (data & ~pmu->counter_bitmask[KVM_PMC_FIXED])
+ return 1;
+ } else if (!msr_info->host_initiated) {
+ data = (s64)(s32)data;
+ }
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
return 0;
@@ -516,6 +522,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
union cpuid10_edx edx;
u64 perf_capabilities;
u64 counter_mask;
+ bool fw_wr = fw_writes_is_enabled(vcpu);
int i;

pmu->nr_arch_gp_counters = 0;
@@ -543,6 +550,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)

pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
kvm_pmu_cap.num_counters_gp);
+ eax.split.bit_width = fw_wr ? max_t(int, 32, eax.split.bit_width) : 32;
eax.split.bit_width = min_t(int, eax.split.bit_width,
kvm_pmu_cap.bit_width_gp);
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
@@ -558,6 +566,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
min3(ARRAY_SIZE(fixed_pmc_events),
(size_t) edx.split.num_counters_fixed,
(size_t)kvm_pmu_cap.num_counters_fixed);
+ edx.split.bit_width_fixed = fw_wr ?
+ max_t(int, 32, edx.split.bit_width_fixed) : 32;
edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
kvm_pmu_cap.bit_width_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =

base-commit: d8708b80fa0e6e21bc0c9e7276ad0bccef73b6e7
--
2.40.0