Re: RFC [patch] sched,cgroup_sched: convince RT_GROUP_SCHEDthrottle to work

From: Mike Galbraith
Date: Sun Apr 15 2012 - 00:51:48 EST


On Sun, 2012-04-15 at 05:44 +0200, Mike Galbraith wrote:
> On Sun, 2012-04-15 at 05:37 +0200, Mike Galbraith wrote:
> > On Sat, 2012-04-14 at 13:10 +0200, Peter Zijlstra wrote:
> > > On Tue, 2012-04-10 at 11:08 +0200, Mike Galbraith wrote:
> > >
> > > > --- a/kernel/sched/rt.c
> > > > +++ b/kernel/sched/rt.c
> > > > @@ -782,6 +782,19 @@ static int do_sched_rt_period_timer(stru
> > > > const struct cpumask *span;
> > > >
> > > > span = sched_rt_period_mask();
> > > > +#ifdef CONFIG_RT_GROUP_SCHED
> > > > + /*
> > > > + * FIXME: isolated CPUs should really leave the root task group,
> > >
> > > No no, that's the wrong fix, the right fix is to remove isolcpus :-)
> >
> > Yeah, isolcpus needs to die, but...
> >
> > > I guess the alternative 'fix' is to not account the rt_runtime on
> > > isolated cpus.. does something like the below actually work?
> >
> > I haven't tried it, because the exact same thing happens when you
> > isolate via cpusets directly below root. One timer, two (or more)
> > rd->span, so _somebody_ is screwed.
>
> You _could_ bail on !rq->sd I suppose, but the way I hacked around it,
> the user can keep the throttle for testing/troubleshooting their
> isolated setup, and turn it off in production. OTOH, auto throttle
> disable for all isolated sets could work just as well.

Like so seems to work. I personally like 2 lines better, but whatever
solves dinky but deadly problem works for me.

---
kernel/sched/core.c | 7 ++++++-
kernel/sched/rt.c | 9 +++++++++
kernel/sched/sched.h | 3 +++
3 files changed, 18 insertions(+), 1 deletion(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5875,6 +5875,11 @@ cpu_attach_domain(struct sched_domain *s
sd->child = NULL;
}

+ if (sd)
+ cpumask_clear_cpu(cpu, cpu_isolated_map);
+ else
+ cpumask_set_cpu(cpu, cpu_isolated_map);
+
sched_domain_debug(sd, cpu);

rq_attach_root(rq, rd);
@@ -5886,7 +5891,7 @@ cpu_attach_domain(struct sched_domain *s
}

/* cpus with isolated domains */
-static cpumask_var_t cpu_isolated_map;
+cpumask_var_t cpu_isolated_map;

/* Setup the mask of cpus configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -922,6 +922,9 @@ static void update_curr_rt(struct rq *rq
if (!rt_bandwidth_enabled())
return;

+ if (cpumask_test_cpu(cpu_of(rq), cpu_isolated_map))
+ return;
+
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);

@@ -1014,6 +1017,9 @@ static inline void dec_rt_prio(struct rt
static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (cpumask_test_cpu(rq_of_rt_rq(rt_rq)->cpu, cpu_isolated_map))
+ return;
+
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted++;

@@ -1035,6 +1041,9 @@ dec_rt_group(struct sched_rt_entity *rt_
static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
+ if (cpumask_test_cpu(rq_of_rt_rq(rt_rq)->cpu, cpu_isolated_map))
+ return;
+
start_rt_bandwidth(&def_rt_bandwidth);
}

--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -73,6 +73,9 @@ struct rt_bandwidth {

extern struct mutex sched_domains_mutex;

+/* cpus with isolated domains */
+extern cpumask_var_t cpu_isolated_map;
+
#ifdef CONFIG_CGROUP_SCHED

#include <linux/cgroup.h>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/