Re: [PATCH v11 10/11] KVM: x86/pmu: Check guest LBR availability in case host reclaims them

From: Peter Zijlstra
Date: Tue May 19 2020 - 07:16:34 EST


On Thu, May 14, 2020 at 04:30:53PM +0800, Like Xu wrote:

> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index ea4faae56473..db185dca903d 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -646,6 +646,43 @@ static void intel_pmu_lbr_cleanup(struct kvm_vcpu *vcpu)
> intel_pmu_free_lbr_event(vcpu);
> }
>
> +static bool intel_pmu_lbr_is_availabile(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> + if (!pmu->lbr_event)
> + return false;
> +
> + if (event_is_oncpu(pmu->lbr_event)) {
> + intel_pmu_intercept_lbr_msrs(vcpu, false);
> + } else {
> + intel_pmu_intercept_lbr_msrs(vcpu, true);
> + return false;
> + }
> +
> + return true;
> +}

This is unreadable gunk, what?

> +/*
> + * Higher priority host perf events (e.g. cpu pinned) could reclaim the
> + * pmu resources (e.g. LBR) that were assigned to the guest. This is
> + * usually done via ipi calls (more details in perf_install_in_context).
> + *
> + * Before entering the non-root mode (with irq disabled here), double
> + * confirm that the pmu features enabled to the guest are not reclaimed
> + * by higher priority host events. Otherwise, disallow vcpu's access to
> + * the reclaimed features.
> + */
> +static void intel_pmu_availability_check(struct kvm_vcpu *vcpu)
> +{
> + lockdep_assert_irqs_disabled();
> +
> + if (lbr_is_enabled(vcpu) && !intel_pmu_lbr_is_availabile(vcpu) &&
> + (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
> + pr_warn_ratelimited("kvm: vcpu-%d: LBR is temporarily unavailable.\n",
> + vcpu->vcpu_id);

More unreadable nonsense; when the events go into ERROR state, it's a
permanent fail, they'll not come back.

> +}
> +
> struct kvm_pmu_ops intel_pmu_ops = {
> .find_arch_event = intel_find_arch_event,
> .find_fixed_event = intel_find_fixed_event,
> @@ -662,4 +699,5 @@ struct kvm_pmu_ops intel_pmu_ops = {
> .reset = intel_pmu_reset,
> .deliver_pmi = intel_pmu_deliver_pmi,
> .lbr_cleanup = intel_pmu_lbr_cleanup,
> + .availability_check = intel_pmu_availability_check,
> };
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9969d663826a..80d036c5f64a 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6696,8 +6696,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>
> pt_guest_enter(vmx);
>
> - if (vcpu_to_pmu(vcpu)->version)
> + if (vcpu_to_pmu(vcpu)->version) {
> atomic_switch_perf_msrs(vmx);
> + kvm_x86_ops.pmu_ops->availability_check(vcpu);
> + }

AFAICT you just did a call out to the kvm_pmu crud in
atomic_switch_perf_msrs(), why do another call?