Re: [PATCH v5 07/44] perf: Add APIs to load/put guest mediated PMU context
From: Mi, Dapeng
Date: Fri Aug 08 2025 - 03:35:10 EST
On 8/7/2025 3:56 AM, Sean Christopherson wrote:
> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>
> Add exported APIs to load/put a guest mediated PMU context. KVM will
> load the guest PMU shortly before VM-Enter, and put the guest PMU shortly
> after VM-Exit.
>
> On the perf side of things, schedule out all exclude_guest events when the
> guest context is loaded, and schedule them back in when the guest context
> is put. I.e. yield the hardware PMU resources to the guest, by way of KVM.
>
> Note, perf is only responsible for managing host context. KVM is
> responsible for loading/storing guest state to/from hardware.
>
> Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> [sean: shuffle patches around, write changelog]
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> include/linux/perf_event.h | 2 ++
> kernel/events/core.c | 61 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 63 insertions(+)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 0958b6d0a61c..42d019d70b42 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1925,6 +1925,8 @@ extern u64 perf_event_pause(struct perf_event *event, bool reset);
> #ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
> int perf_create_mediated_pmu(void);
> void perf_release_mediated_pmu(void);
> +void perf_load_guest_context(unsigned long data);
> +void perf_put_guest_context(void);
> #endif
>
> #else /* !CONFIG_PERF_EVENTS: */
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 6875b56ddd6b..77398b1ad4c5 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -469,10 +469,19 @@ static cpumask_var_t perf_online_pkg_mask;
> static cpumask_var_t perf_online_sys_mask;
> static struct kmem_cache *perf_event_cache;
>
> +#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
> +static DEFINE_PER_CPU(bool, guest_ctx_loaded);
> +
> +static __always_inline bool is_guest_mediated_pmu_loaded(void)
> +{
> + return __this_cpu_read(guest_ctx_loaded);
> +}
> +#else
> static __always_inline bool is_guest_mediated_pmu_loaded(void)
> {
> return false;
> }
> +#endif
>
> /*
> * perf event paranoia level:
> @@ -6379,6 +6388,58 @@ void perf_release_mediated_pmu(void)
> atomic_dec(&nr_mediated_pmu_vms);
> }
> EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
> +
> +/* When loading a guest's mediated PMU, schedule out all exclude_guest events. */
> +void perf_load_guest_context(unsigned long data)
nit: the "data" argument is not used in this patch, we may defer to
introduce it in patch 09/44.
> +{
> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
> +
> + lockdep_assert_irqs_disabled();
> +
> + guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx);
> +
> + if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded)))
> + return;
> +
> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
> + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST);
> + if (cpuctx->task_ctx) {
> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
> + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST);
> + }
> +
> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
> + if (cpuctx->task_ctx)
> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
> +
> + __this_cpu_write(guest_ctx_loaded, true);
> +}
> +EXPORT_SYMBOL_GPL(perf_load_guest_context);
> +
> +void perf_put_guest_context(void)
> +{
> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
> +
> + lockdep_assert_irqs_disabled();
> +
> + guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx);
> +
> + if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded)))
> + return;
> +
> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
> + if (cpuctx->task_ctx)
> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST);
> +
> + perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST);
> +
> + if (cpuctx->task_ctx)
> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
> +
> + __this_cpu_write(guest_ctx_loaded, false);
> +}
> +EXPORT_SYMBOL_GPL(perf_put_guest_context);
> #else
> static int mediated_pmu_account_event(struct perf_event *event) { return 0; }
> static void mediated_pmu_unaccount_event(struct perf_event *event) {}