[PATCH v2 07/12] KVM: x86/pmu: Use PERF_TYPE_RAW to merge reprogram_{gp, fixed}counter()

From: Like Xu
Date: Wed Mar 02 2022 - 06:14:25 EST


From: Like Xu <likexu@xxxxxxxxxxx>

The code sketch for reprogram_{gp, fixed}_counter() is similar, while the
fixed counter using the PERF_TYPE_HARDWAR type and the gp being
able to use either PERF_TYPE_HARDWAR or PERF_TYPE_RAW type
depending on the pmc->eventsel value.

After 'commit 761875634a5e ("KVM: x86/pmu: Setup pmc->eventsel
for fixed PMCs")', the pmc->eventsel of the fixed counter will also have
been setup with the same semantic value and will not be changed during
the guest runtime. But essentially, "the HARDWARE is just a convenience
wrapper over RAW IIRC", quoated from Peterz. So it could be pretty safe
to use the PERF_TYPE_RAW type only to program both gp and fixed
counters naturally in the reprogram_counter().

To make the gp and fixed counters more semantically symmetrical,
the selection of EVENTSEL_{USER, OS, INT} bits is temporarily translated
via fixed_ctr_ctrl before the pmc_reprogram_counter() call.

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
arch/x86/kvm/pmu.c | 128 +++++++++++++----------------------
arch/x86/kvm/vmx/pmu_intel.c | 2 +-
2 files changed, 47 insertions(+), 83 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 5299488b002c..00e1660c10ca 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -215,85 +215,60 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return allow_event;
}

-static void reprogram_gp_counter(struct kvm_pmc *pmc)
-{
- u64 config;
- u32 type = PERF_TYPE_RAW;
- u64 eventsel = pmc->eventsel;
-
- if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
- printk_once("kvm pmu: pin control bit is ignored\n");
-
- pmc_pause_counter(pmc);
-
- if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
- return;
-
- if (!check_pmu_event_filter(pmc))
- return;
-
- if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
- ARCH_PERFMON_EVENTSEL_INV |
- ARCH_PERFMON_EVENTSEL_CMASK |
- HSW_IN_TX |
- HSW_IN_TX_CHECKPOINTED))) {
- config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
- if (config != PERF_COUNT_HW_MAX)
- type = PERF_TYPE_HARDWARE;
- }
-
- if (type == PERF_TYPE_RAW)
- config = eventsel & AMD64_RAW_EVENT_MASK;
-
- if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
- return;
-
- pmc_release_perf_event(pmc);
-
- pmc->current_config = eventsel;
- pmc_reprogram_counter(pmc, type, config,
- !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
- !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
- eventsel & ARCH_PERFMON_EVENTSEL_INT,
- (eventsel & HSW_IN_TX),
- (eventsel & HSW_IN_TX_CHECKPOINTED));
-}
-
-static void reprogram_fixed_counter(struct kvm_pmc *pmc)
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- int idx = pmc->idx - INTEL_PMC_IDX_FIXED;
- u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
- unsigned en_field = ctrl & 0x3;
- bool pmi = ctrl & 0x8;

- pmc_pause_counter(pmc);
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;

- if (!en_field || !pmc_is_enabled(pmc))
- return;
-
- if (!check_pmu_event_filter(pmc))
- return;
-
- if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
- return;
-
- pmc_release_perf_event(pmc);
-
- pmc->current_config = (u64)ctrl;
- pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
- kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
- !(en_field & 0x2), /* exclude user */
- !(en_field & 0x1), /* exclude kernel */
- pmi, false, false);
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
}

void reprogram_counter(struct kvm_pmc *pmc)
{
- if (pmc_is_gp(pmc))
- reprogram_gp_counter(pmc);
- else
- reprogram_fixed_counter(pmc);
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ u64 eventsel = pmc->eventsel;
+ u64 new_config = eventsel;
+ u8 fixed_ctr_ctrl;
+
+ pmc_pause_counter(pmc);
+
+ if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
+ return;
+
+ if (!check_pmu_event_filter(pmc))
+ return;
+
+ if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+ printk_once("kvm pmu: pin control bit is ignored\n");
+
+ if (pmc_is_fixed(pmc)) {
+ fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED);
+ if (fixed_ctr_ctrl & 0x1)
+ eventsel |= ARCH_PERFMON_EVENTSEL_OS;
+ if (fixed_ctr_ctrl & 0x2)
+ eventsel |= ARCH_PERFMON_EVENTSEL_USR;
+ if (fixed_ctr_ctrl & 0x8)
+ eventsel |= ARCH_PERFMON_EVENTSEL_INT;
+ new_config = (u64)fixed_ctr_ctrl;
+ }
+
+ if (pmc->current_config == new_config && pmc_resume_counter(pmc))
+ return;
+
+ pmc_release_perf_event(pmc);
+
+ pmc->current_config = new_config;
+ pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+ (eventsel & AMD64_RAW_EVENT_MASK),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+ eventsel & ARCH_PERFMON_EVENTSEL_INT,
+ (eventsel & HSW_IN_TX),
+ (eventsel & HSW_IN_TX_CHECKPOINTED));
}
EXPORT_SYMBOL_GPL(reprogram_counter);

@@ -451,17 +426,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}

-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 19b78a9d9d47..d823fbe4e155 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -492,7 +492,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;

entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry || !vcpu->kvm->arch.enable_pmu)
+ if (!entry || !vcpu->kvm->arch.enable_pmu || !boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return;
eax.full = entry->eax;
edx.full = entry->edx;
--
2.35.1