Re: [tip:timers/core] hrtimer: Allow concurrent hrtimer_start() for self restarting timers

From: Peter Zijlstra
Date: Thu May 14 2015 - 06:23:39 EST


On Wed, May 13, 2015 at 07:09:40PM -0400, Sasha Levin wrote:
> On 05/13/2015 09:43 AM, Peter Zijlstra wrote:

> > So I suppose the below (compile tested) patch should fix things, but
> > seeing how I've been up since 4am I might just have missed something
> > obvious :-)
>
> I know exactly how you feel...

How about I add a raw_spin_lock_init() for the new lock I added ;-)

This one seems to boot and do simple perf things with lockdep enabled.

---
include/linux/perf_event.h | 4 ++++
kernel/events/core.c | 29 ++++++++++++++++-------------
kernel/sched/core.c | 12 ------------
kernel/sched/fair.c | 17 +++++++++++++----
kernel/sched/rt.c | 8 +++++++-
kernel/sched/sched.h | 5 ++---
6 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e86f85abeda7..1f5607b220e4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -566,8 +566,12 @@ struct perf_cpu_context {
struct perf_event_context *task_ctx;
int active_oncpu;
int exclusive;
+
+ raw_spinlock_t hrtimer_lock;
struct hrtimer hrtimer;
ktime_t hrtimer_interval;
+ unsigned int hrtimer_active;
+
struct pmu *unique_pmu;
struct perf_cgroup *cgrp;
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 84231a146dd5..1c6c2826af1e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -752,24 +752,21 @@ perf_cgroup_mark_enabled(struct perf_event *event,
static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
{
struct perf_cpu_context *cpuctx;
- enum hrtimer_restart ret = HRTIMER_NORESTART;
int rotations = 0;

WARN_ON(!irqs_disabled());

cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
-
rotations = perf_rotate_context(cpuctx);

- /*
- * arm timer if needed
- */
- if (rotations) {
+ raw_spin_lock(&cpuctx->hrtimer_lock);
+ if (rotations)
hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
- ret = HRTIMER_RESTART;
- }
+ else
+ cpuctx->hrtimer_active = 0;
+ raw_spin_unlock(&cpuctx->hrtimer_lock);

- return ret;
+ return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
}

static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
@@ -792,7 +789,8 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)

cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);

- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+ raw_spin_lock_init(&cpuctx->hrtimer_lock);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
timer->function = perf_mux_hrtimer_handler;
}

@@ -800,15 +798,20 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
{
struct hrtimer *timer = &cpuctx->hrtimer;
struct pmu *pmu = cpuctx->ctx.pmu;
+ unsigned long flags;

/* not for SW PMU */
if (pmu->task_ctx_nr == perf_sw_context)
return 0;

- if (hrtimer_is_queued(timer))
- return 0;
+ raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
+ if (!cpuctx->hrtimer_active) {
+ cpuctx->hrtimer_active = 1;
+ hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ }
+ raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);

- hrtimer_start(timer, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED);
return 0;
}

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 613b61e381ca..bd9182aa74c0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,18 +90,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>

-void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
-{
- /*
- * Do not forward the expiration time of active timers;
- * we do not want to loose an overrun.
- */
- if (!hrtimer_active(period_timer))
- hrtimer_forward_now(period_timer, period);
-
- hrtimer_start_expires(period_timer, HRTIMER_MODE_ABS_PINNED);
-}
-
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8c1510abeefa..2c08783ee95c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3883,8 +3883,9 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
if (runtime_refresh_within(cfs_b, min_left))
return;

- start_bandwidth_timer(&cfs_b->slack_timer,
- ns_to_ktime(cfs_bandwidth_slack_period));
+ hrtimer_start(&cfs_b->slack_timer,
+ ns_to_ktime(cfs_bandwidth_slack_period),
+ HRTIMER_MODE_REL);
}

/* we know any runtime found here is valid as update_curr() precedes return */
@@ -4025,6 +4026,8 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)

idle = do_sched_cfs_period_timer(cfs_b, overrun);
}
+ if (idle)
+ cfs_b->period_active = 0;
raw_spin_unlock(&cfs_b->lock);

return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -4038,7 +4041,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period = ns_to_ktime(default_cfs_period());

INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
cfs_b->period_timer.function = sched_cfs_period_timer;
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
@@ -4052,7 +4055,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)

void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
- start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+ lockdep_assert_held(&cfs_b->lock);
+
+ if (!cfs_b->period_active) {
+ cfs_b->period_active = 1;
+ hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+ hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
+ }
}

static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8781a38a636e..7d7093c51f8d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -31,6 +31,8 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
idle = do_sched_rt_period_timer(rt_b, overrun);
raw_spin_lock(&rt_b->rt_runtime_lock);
}
+ if (idle)
+ rt_b->rt_period_active = 0;
raw_spin_unlock(&rt_b->rt_runtime_lock);

return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -54,7 +56,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
return;

raw_spin_lock(&rt_b->rt_runtime_lock);
- start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+ if (!rt_b->rt_period_active) {
+ rt_b->rt_period_active = 1;
+ hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+ hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+ }
raw_spin_unlock(&rt_b->rt_runtime_lock);
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 460f98648afd..f10a445910c9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -137,6 +137,7 @@ struct rt_bandwidth {
ktime_t rt_period;
u64 rt_runtime;
struct hrtimer rt_period_timer;
+ unsigned int rt_period_active;
};

void __dl_clear_params(struct task_struct *p);
@@ -221,7 +222,7 @@ struct cfs_bandwidth {
s64 hierarchical_quota;
u64 runtime_expires;

- int idle;
+ int idle, period_active;
struct hrtimer period_timer, slack_timer;
struct list_head throttled_cfs_rq;

@@ -1410,8 +1411,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
static inline void sched_avg_update(struct rq *rq) { }
#endif

-extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
-
/*
* __task_rq_lock - lock the rq @p resides on.
*/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/