[PATCH v3 08/11] KVM: x86/pmu: Use PERF_TYPE_RAW to merge reprogram_{gp,fixed}counter()

From: Like Xu
Date: Mon Apr 11 2022 - 05:36:45 EST


From: Like Xu <likexu@xxxxxxxxxxx>

The code sketch for reprogram_{gp, fixed}_counter() is similar, while the
fixed counter using the PERF_TYPE_HARDWAR type and the gp being
able to use either PERF_TYPE_HARDWAR or PERF_TYPE_RAW type
depending on the pmc->eventsel value.

After 'commit 761875634a5e ("KVM: x86/pmu: Setup pmc->eventsel
for fixed PMCs")', the pmc->eventsel of the fixed counter will also have
been setup with the same semantic value and will not be changed during
the guest runtime.

The original story of using the PERF_TYPE_HARDWARE type is to emulate
guest architecture PMU on a host without architecture PMU (the Pentium 4),
for which the guest vPMC needs to be reprogrammed using the kernel
generic perf_hw_id. But essentially, "the HARDWARE is just a convenience
wrapper over RAW IIRC", quoated from Peterz. So it could be pretty safe
to use the PERF_TYPE_RAW type only in practice to program both gp and
fixed counters naturally in the reprogram_counter().

To make the gp and fixed counters more semantically symmetrical,
the selection of EVENTSEL_{USER, OS, INT} bits is temporarily translated
via fixed_ctr_ctrl before the pmc_reprogram_counter() call.

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
arch/x86/kvm/pmu.c | 125 ++++++++++++-----------------------
arch/x86/kvm/vmx/pmu_intel.c | 3 +-
2 files changed, 46 insertions(+), 82 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 598b19223965..58960844f49f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -240,84 +240,58 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return allow_event;
}

-static void reprogram_gp_counter(struct kvm_pmc *pmc)
-{
- u64 config;
- u32 type = PERF_TYPE_RAW;
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- u64 eventsel = pmc->eventsel;
-
- if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
- printk_once("kvm pmu: pin control bit is ignored\n");
-
- pmc_pause_counter(pmc);
-
- if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
- return;
-
- if (!check_pmu_event_filter(pmc))
- return;
-
- if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
- ARCH_PERFMON_EVENTSEL_INV |
- ARCH_PERFMON_EVENTSEL_CMASK |
- HSW_IN_TX |
- HSW_IN_TX_CHECKPOINTED))) {
- config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
- if (config != PERF_COUNT_HW_MAX)
- type = PERF_TYPE_HARDWARE;
- }
-
- if (type == PERF_TYPE_RAW)
- config = eventsel & pmu->raw_event_mask;
-
- if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
- return;
-
- pmc_release_perf_event(pmc);
-
- pmc->current_config = eventsel;
- pmc_reprogram_counter(pmc, type, config,
- !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
- !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
- eventsel & ARCH_PERFMON_EVENTSEL_INT);
-}
-
-static void reprogram_fixed_counter(struct kvm_pmc *pmc)
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- int idx = pmc->idx - INTEL_PMC_IDX_FIXED;
- u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
- unsigned en_field = ctrl & 0x3;
- bool pmi = ctrl & 0x8;

- pmc_pause_counter(pmc);
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;

- if (!en_field || !pmc_is_enabled(pmc))
- return;
-
- if (!check_pmu_event_filter(pmc))
- return;
-
- if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
- return;
-
- pmc_release_perf_event(pmc);
-
- pmc->current_config = (u64)ctrl;
- pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
- static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc),
- !(en_field & 0x2), /* exclude user */
- !(en_field & 0x1), /* exclude kernel */
- pmi);
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
}

void reprogram_counter(struct kvm_pmc *pmc)
{
- if (pmc_is_gp(pmc))
- reprogram_gp_counter(pmc);
- else
- reprogram_fixed_counter(pmc);
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ u64 eventsel = pmc->eventsel;
+ u64 new_config = eventsel;
+ u8 fixed_ctr_ctrl;
+
+ pmc_pause_counter(pmc);
+
+ if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
+ return;
+
+ if (!check_pmu_event_filter(pmc))
+ return;
+
+ if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+ printk_once("kvm pmu: pin control bit is ignored\n");
+
+ if (pmc_is_fixed(pmc)) {
+ fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED);
+ if (fixed_ctr_ctrl & 0x1)
+ eventsel |= ARCH_PERFMON_EVENTSEL_OS;
+ if (fixed_ctr_ctrl & 0x2)
+ eventsel |= ARCH_PERFMON_EVENTSEL_USR;
+ if (fixed_ctr_ctrl & 0x8)
+ eventsel |= ARCH_PERFMON_EVENTSEL_INT;
+ new_config = (u64)fixed_ctr_ctrl;
+ }
+
+ if (pmc->current_config == new_config && pmc_resume_counter(pmc))
+ return;
+
+ pmc_release_perf_event(pmc);
+
+ pmc->current_config = new_config;
+ pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+ (eventsel & pmu->raw_event_mask),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+ eventsel & ARCH_PERFMON_EVENTSEL_INT);
}
EXPORT_SYMBOL_GPL(reprogram_counter);

@@ -474,17 +448,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}

-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 1f910b349978..11eb186929bc 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -498,7 +498,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->raw_event_mask = X86_RAW_EVENT_MASK;

entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry || !vcpu->kvm->arch.enable_pmu)
+ if (!entry || !vcpu->kvm->arch.enable_pmu ||
+ !boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return;
eax.full = entry->eax;
edx.full = entry->edx;
--
2.35.1