[PATCH 2/2] sched/rt: Up the root domain ref count when passing it around via IPIs

From: Steven Rostedt
Date: Tue Jan 23 2018 - 20:56:31 EST


From: "Steven Rostedt (VMware)" <rostedt@xxxxxxxxxxx>

When issuing an IPI RT push, where an IPI is sent to each CPU that has more
than one RT task scheduled on it, it references the root domain's rto_mask,
that contains all the CPUs within the root domain that has more than one RT
task in the runable state. The problem is, after the IPIs are initiated, the
rq->lock is released. This means that the root domain that is associated to
the run queue could be freed while the IPIs are going around.

Add a sched_get_rd() and a sched_put_rd() that will increment and decrement
the root domain's ref count respectively. This way when initiating the IPIs,
the scheduler will up the root domain's ref count before releasing the
rq->lock, ensuring that the root domain does not go away until the IPI round
is complete.

Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@xxxxxxxxxxxxxx
Cc: stable@xxxxxxxxxxxxxxx
Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic")
Reported-by: Pavan Kondeti <pkondeti@xxxxxxxxxxxxxx>
Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
---
kernel/sched/rt.c | 9 +++++++--
kernel/sched/sched.h | 2 ++
kernel/sched/topology.c | 13 +++++++++++++
3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 274b55e4a560..3401f588c916 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1990,8 +1990,11 @@ static void tell_cpu_to_push(struct rq *rq)

rto_start_unlock(&rq->rd->rto_loop_start);

- if (cpu >= 0)
+ if (cpu >= 0) {
+ /* Make sure the rd does not get freed while pushing */
+ sched_get_rd(rq->rd);
irq_work_queue_on(&rq->rd->rto_push_work, cpu);
+ }
}

/* Called from hardirq context */
@@ -2021,8 +2024,10 @@ void rto_push_irq_work_func(struct irq_work *work)

raw_spin_unlock(&rd->rto_lock);

- if (cpu < 0)
+ if (cpu < 0) {
+ sched_put_rd(rd);
return;
+ }

/* Try the next RT overloaded CPU */
irq_work_queue_on(&rd->rto_push_work, cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2e95505e23c6..fb5fc458547f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -691,6 +691,8 @@ extern struct mutex sched_domains_mutex;
extern void init_defrootdomain(void);
extern int sched_init_domains(const struct cpumask *cpu_map);
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
+extern void sched_get_rd(struct root_domain *rd);
+extern void sched_put_rd(struct root_domain *rd);

#ifdef HAVE_RT_PUSH_IPI
extern void rto_push_irq_work_func(struct irq_work *work);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 034cbed7f88b..519b024f4e94 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -259,6 +259,19 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
call_rcu_sched(&old_rd->rcu, free_rootdomain);
}

+void sched_get_rd(struct root_domain *rd)
+{
+ atomic_inc(&rd->refcount);
+}
+
+void sched_put_rd(struct root_domain *rd)
+{
+ if (!atomic_dec_and_test(&rd->refcount))
+ return;
+
+ call_rcu_sched(&rd->rcu, free_rootdomain);
+}
+
static int init_rootdomain(struct root_domain *rd)
{
if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
--
2.15.1