Re: [PATCH RFC] perf, core: disable pmu while context rotation onlyif needed

From: Peter Zijlstra
Date: Tue Nov 15 2011 - 07:07:49 EST


On Tue, 2011-11-15 at 13:34 +0200, Gleb Natapov wrote:
>
> Currently pmu is disabled and re-enabled on each timer interrupt even
> when no rotation or frequency adjustment is needed. On Intel CPU this
> results in two writes into PERF_GLOBAL_CTRL MSR per tick. On bare metal
> it does not cause significant slowdown, but when running perf in a virtual
> machine it leads to 20% slowdown on my machine.


I detest asymmetric locking like that, does something like the below
also work for you?

---
include/linux/perf_event.h | 1 +
kernel/events/core.c | 30 +++++++++++++++++++++++++-----
2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1e9ebe5..92773aa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -889,6 +889,7 @@ struct perf_event_context {
int nr_active;
int is_active;
int nr_stat;
+ int nr_freq;
int rotate_disable;
atomic_t refcount;
struct task_struct *task;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7f693e9..d8f2f38 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1127,6 +1127,8 @@ event_sched_out(struct perf_event *event,
if (!is_software_event(event))
cpuctx->active_oncpu--;
ctx->nr_active--;
+ if (event->attr.freq && event->attr.sample_freq)
+ ctx->nr_freq--;
if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
}
@@ -1403,6 +1405,8 @@ event_sched_in(struct perf_event *event,
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
+ if (event->attr.freq && event->attr.sample_freq)
+ ctx->nr_freq++;

if (event->attr.exclusive)
cpuctx->exclusive = 1;
@@ -2324,6 +2328,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
u64 interrupts, now;
s64 delta;

+ if (!ctx->nr_freq)
+ return;
+
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
@@ -2379,12 +2386,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
struct perf_event_context *ctx = NULL;
- int rotate = 0, remove = 1;
+ int rotate = 0, remove = 1, freq = 0;

if (cpuctx->ctx.nr_events) {
remove = 0;
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
+ if (cpuctx->ctx.nr_freq)
+ freq = 1;
}

ctx = cpuctx->task_ctx;
@@ -2392,16 +2401,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
remove = 0;
if (ctx->nr_events != ctx->nr_active)
rotate = 1;
+ if (ctx->nr_freq)
+ freq = 1;
}

+ if (!rotate && !freq)
+ goto done;
+
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(cpuctx->ctx.pmu);
+
+ if (!freq)
+ goto rotate;
+
perf_ctx_adjust_freq(&cpuctx->ctx, interval);
if (ctx)
perf_ctx_adjust_freq(ctx, interval);

+rotate:
if (!rotate)
- goto done;
+ goto unlock;

cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
@@ -2413,12 +2432,13 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)

perf_event_sched_in(cpuctx, ctx, current);

+unlock:
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+
done:
if (remove)
list_del_init(&cpuctx->rotation_list);
-
- perf_pmu_enable(cpuctx->ctx.pmu);
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}

void perf_event_task_tick(void)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/