Re: [PATCH] perf/core: Fix endless multiplex timer

From: Peter Zijlstra
Date: Thu Mar 05 2020 - 07:39:02 EST


On Wed, Mar 04, 2020 at 09:20:42AM -0500, Liang, Kan wrote:
>
> NMI watchdog is pinned event.
> ctx_event_to_rotate() will only pick an event from the flexible_groups.
> So the cpu_ctx_sched_out() in perf_rotate_context() will never be called.

Surely that's fixable; same principle.

---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3f1f77de7247..595fb3decd43 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2182,6 +2182,7 @@ __perf_remove_from_context(struct perf_event *event,

if (!ctx->nr_events && ctx->is_active) {
ctx->is_active = 0;
+ ctx->rotate_necessary = 0;
if (ctx->task) {
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
cpuctx->task_ctx = NULL;
@@ -3077,12 +3078,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (!ctx->nr_active || !(is_active & EVENT_ALL))
return;

- /*
- * If we had been multiplexing, no rotations are necessary, now no events
- * are active.
- */
- ctx->rotate_necessary = 0;
-
perf_pmu_disable(ctx->pmu);
if (is_active & EVENT_PINNED) {
list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list)
@@ -3092,6 +3087,13 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (is_active & EVENT_FLEXIBLE) {
list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
group_sched_out(event, cpuctx, ctx);
+
+ /*
+ * Since we cleared EVENT_FLEXIBLE, also clear
+ * rotate_necessary, is will be reset by
+ * ctx_flexible_sched_in() when needed.
+ */
+ ctx->rotate_necessary = 0;
}
perf_pmu_enable(ctx->pmu);
}
@@ -3841,6 +3843,12 @@ ctx_event_to_rotate(struct perf_event_context *ctx)
typeof(*event), group_node);
}

+ /*
+ * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in()
+ * finds there are unschedulable events, it will set it again.
+ */
+ ctx->rotate_necessary = 0;
+
return event;
}