Re: [GIT PULL] RCU fixes for v3.0

From: Paul E. McKenney
Date: Wed Jul 20 2011 - 18:06:58 EST


On Wed, Jul 20, 2011 at 05:55:37PM -0400, Ed Tomlinson wrote:
> On Wednesday 20 July 2011 17:04:26 Ingo Molnar wrote:
> >
> > * Ingo Molnar <mingo@xxxxxxx> wrote:
> >
> > > Having put some testing into your rcu/urgent branch today i'd feel
> > > more comfortable with taking this plus perhaps an RCU_BOOST
> > > disabling patch. That makes it all fundamentally tested by a number
> > > of people (including those who reported/reproduced the problems).
> > >
> > > Linus, would that approach be fine with you? I'll send an RFC pull
> > > request for the 6 patches as a reply to this mail, in a couple of
> > > minutes.
> >
> > here it is:
> >
> > Linus,
> >
> > Please pull the latest core-urgent-for-linus git tree from:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git core-urgent-for-linus
>
> I've been running on the previous version of this tree along with a fix for the first patch. An
> additional patch was added this morning to prevent an invalid warning from triggering. Aside
> from that warning the system was stable with no other unexpected output in dmesg.
>
> Resetting to master and pulling the above tree gives me a clean boot. On atleast one box that was
> seeing problems this seems too be making things better (boost and threadirqs enabled).

Thank you for all the testing, Ed!!! As always, keeping my fingers crossed.

Thanx, Paul

> Thanks
> Ed
>
> > In addition to the testing Paul & co has performed i have boot tested
> > x86 defconfig/allno/allyes/allmod and a fair number of randconfigs,
> > and i build tested a dozen architecture defconfigs, amongst them the
> > key ones.
> >
> > ( We could also mark RCU_BOOST in init/Kconfig as EXPERIMENTAL, to
> > further de-risk it - but that change is not part of this pull
> > request. )
> >
> > Thanks,
> >
> > Ingo
> >
> > ------------------>
> > Paul E. McKenney (5):
> > rcu: decrease rcu_report_exp_rnp coupling with scheduler
> > rcu: Fix RCU_BOOST race handling current->rcu_read_unlock_special
> > rcu: Streamline code produced by __rcu_read_unlock()
> > rcu: protect __rcu_read_unlock() against scheduler-using irq handlers
> > signal: align __lock_task_sighand() irq disabling and RCU
> >
> > Peter Zijlstra (2):
> > sched: Add irq_{enter,exit}() to scheduler_ipi()
> > softirq,rcu: Inform RCU of irq_exit() activity
> >
> >
> > include/linux/sched.h | 3 ++
> > kernel/rcutree_plugin.h | 53 ++++++++++++++++++++++++++++++++++------------
> > kernel/sched.c | 44 +++++++++++++++++++++++++++++++++-----
> > kernel/signal.c | 19 +++++++++++-----
> > kernel/softirq.c | 12 ++++++++-
> > 5 files changed, 103 insertions(+), 28 deletions(-)
> >
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 496770a..76676a4 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -1254,6 +1254,9 @@ struct task_struct {
> > #ifdef CONFIG_PREEMPT_RCU
> > int rcu_read_lock_nesting;
> > char rcu_read_unlock_special;
> > +#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
> > + int rcu_boosted;
> > +#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
> > struct list_head rcu_node_entry;
> > #endif /* #ifdef CONFIG_PREEMPT_RCU */
> > #ifdef CONFIG_TREE_PREEMPT_RCU
> > diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
> > index 75113cb..8aafbb8 100644
> > --- a/kernel/rcutree_plugin.h
> > +++ b/kernel/rcutree_plugin.h
> > @@ -68,6 +68,7 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
> > DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
> > static struct rcu_state *rcu_state = &rcu_preempt_state;
> >
> > +static void rcu_read_unlock_special(struct task_struct *t);
> > static int rcu_preempted_readers_exp(struct rcu_node *rnp);
> >
> > /*
> > @@ -147,7 +148,7 @@ static void rcu_preempt_note_context_switch(int cpu)
> > struct rcu_data *rdp;
> > struct rcu_node *rnp;
> >
> > - if (t->rcu_read_lock_nesting &&
> > + if (t->rcu_read_lock_nesting > 0 &&
> > (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
> >
> > /* Possibly blocking in an RCU read-side critical section. */
> > @@ -190,6 +191,14 @@ static void rcu_preempt_note_context_switch(int cpu)
> > rnp->gp_tasks = &t->rcu_node_entry;
> > }
> > raw_spin_unlock_irqrestore(&rnp->lock, flags);
> > + } else if (t->rcu_read_lock_nesting < 0 &&
> > + t->rcu_read_unlock_special) {
> > +
> > + /*
> > + * Complete exit from RCU read-side critical section on
> > + * behalf of preempted instance of __rcu_read_unlock().
> > + */
> > + rcu_read_unlock_special(t);
> > }
> >
> > /*
> > @@ -284,7 +293,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
> > * notify RCU core processing or task having blocked during the RCU
> > * read-side critical section.
> > */
> > -static void rcu_read_unlock_special(struct task_struct *t)
> > +static noinline void rcu_read_unlock_special(struct task_struct *t)
> > {
> > int empty;
> > int empty_exp;
> > @@ -309,7 +318,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > }
> >
> > /* Hardware IRQ handlers cannot block. */
> > - if (in_irq()) {
> > + if (in_irq() || in_serving_softirq()) {
> > local_irq_restore(flags);
> > return;
> > }
> > @@ -342,6 +351,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > #ifdef CONFIG_RCU_BOOST
> > if (&t->rcu_node_entry == rnp->boost_tasks)
> > rnp->boost_tasks = np;
> > + /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */
> > + if (t->rcu_boosted) {
> > + special |= RCU_READ_UNLOCK_BOOSTED;
> > + t->rcu_boosted = 0;
> > + }
> > #endif /* #ifdef CONFIG_RCU_BOOST */
> > t->rcu_blocked_node = NULL;
> >
> > @@ -358,7 +372,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > #ifdef CONFIG_RCU_BOOST
> > /* Unboost if we were boosted. */
> > if (special & RCU_READ_UNLOCK_BOOSTED) {
> > - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
> > rt_mutex_unlock(t->rcu_boost_mutex);
> > t->rcu_boost_mutex = NULL;
> > }
> > @@ -387,13 +400,22 @@ void __rcu_read_unlock(void)
> > struct task_struct *t = current;
> >
> > barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
> > - --t->rcu_read_lock_nesting;
> > - barrier(); /* decrement before load of ->rcu_read_unlock_special */
> > - if (t->rcu_read_lock_nesting == 0 &&
> > - unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
> > - rcu_read_unlock_special(t);
> > + if (t->rcu_read_lock_nesting != 1)
> > + --t->rcu_read_lock_nesting;
> > + else {
> > + t->rcu_read_lock_nesting = INT_MIN;
> > + barrier(); /* assign before ->rcu_read_unlock_special load */
> > + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
> > + rcu_read_unlock_special(t);
> > + barrier(); /* ->rcu_read_unlock_special load before assign */
> > + t->rcu_read_lock_nesting = 0;
> > + }
> > #ifdef CONFIG_PROVE_LOCKING
> > - WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
> > + {
> > + int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
> > +
> > + WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
> > + }
> > #endif /* #ifdef CONFIG_PROVE_LOCKING */
> > }
> > EXPORT_SYMBOL_GPL(__rcu_read_unlock);
> > @@ -589,7 +611,8 @@ static void rcu_preempt_check_callbacks(int cpu)
> > rcu_preempt_qs(cpu);
> > return;
> > }
> > - if (per_cpu(rcu_preempt_data, cpu).qs_pending)
> > + if (t->rcu_read_lock_nesting > 0 &&
> > + per_cpu(rcu_preempt_data, cpu).qs_pending)
> > t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
> > }
> >
> > @@ -695,9 +718,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
> >
> > raw_spin_lock_irqsave(&rnp->lock, flags);
> > for (;;) {
> > - if (!sync_rcu_preempt_exp_done(rnp))
> > + if (!sync_rcu_preempt_exp_done(rnp)) {
> > + raw_spin_unlock_irqrestore(&rnp->lock, flags);
> > break;
> > + }
> > if (rnp->parent == NULL) {
> > + raw_spin_unlock_irqrestore(&rnp->lock, flags);
> > wake_up(&sync_rcu_preempt_exp_wq);
> > break;
> > }
> > @@ -707,7 +733,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
> > raw_spin_lock(&rnp->lock); /* irqs already disabled */
> > rnp->expmask &= ~mask;
> > }
> > - raw_spin_unlock_irqrestore(&rnp->lock, flags);
> > }
> >
> > /*
> > @@ -1174,7 +1199,7 @@ static int rcu_boost(struct rcu_node *rnp)
> > t = container_of(tb, struct task_struct, rcu_node_entry);
> > rt_mutex_init_proxy_locked(&mtx, t);
> > t->rcu_boost_mutex = &mtx;
> > - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
> > + t->rcu_boosted = 1;
> > raw_spin_unlock_irqrestore(&rnp->lock, flags);
> > rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
> > rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
> > diff --git a/kernel/sched.c b/kernel/sched.c
> > index 3dc716f..31e92ae 100644
> > --- a/kernel/sched.c
> > +++ b/kernel/sched.c
> > @@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
> > }
> >
> > #ifdef CONFIG_SMP
> > -static void sched_ttwu_pending(void)
> > +static void sched_ttwu_do_pending(struct task_struct *list)
> > {
> > struct rq *rq = this_rq();
> > - struct task_struct *list = xchg(&rq->wake_list, NULL);
> > -
> > - if (!list)
> > - return;
> >
> > raw_spin_lock(&rq->lock);
> >
> > @@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void)
> > raw_spin_unlock(&rq->lock);
> > }
> >
> > +#ifdef CONFIG_HOTPLUG_CPU
> > +
> > +static void sched_ttwu_pending(void)
> > +{
> > + struct rq *rq = this_rq();
> > + struct task_struct *list = xchg(&rq->wake_list, NULL);
> > +
> > + if (!list)
> > + return;
> > +
> > + sched_ttwu_do_pending(list);
> > +}
> > +
> > +#endif /* CONFIG_HOTPLUG_CPU */
> > +
> > void scheduler_ipi(void)
> > {
> > - sched_ttwu_pending();
> > + struct rq *rq = this_rq();
> > + struct task_struct *list = xchg(&rq->wake_list, NULL);
> > +
> > + if (!list)
> > + return;
> > +
> > + /*
> > + * Not all reschedule IPI handlers call irq_enter/irq_exit, since
> > + * traditionally all their work was done from the interrupt return
> > + * path. Now that we actually do some work, we need to make sure
> > + * we do call them.
> > + *
> > + * Some archs already do call them, luckily irq_enter/exit nest
> > + * properly.
> > + *
> > + * Arguably we should visit all archs and update all handlers,
> > + * however a fair share of IPIs are still resched only so this would
> > + * somewhat pessimize the simple resched case.
> > + */
> > + irq_enter();
> > + sched_ttwu_do_pending(list);
> > + irq_exit();
> > }
> >
> > static void ttwu_queue_remote(struct task_struct *p, int cpu)
> > diff --git a/kernel/signal.c b/kernel/signal.c
> > index ff76786..415d85d 100644
> > --- a/kernel/signal.c
> > +++ b/kernel/signal.c
> > @@ -1178,18 +1178,25 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
> > {
> > struct sighand_struct *sighand;
> >
> > - rcu_read_lock();
> > for (;;) {
> > + local_irq_save(*flags);
> > + rcu_read_lock();
> > sighand = rcu_dereference(tsk->sighand);
> > - if (unlikely(sighand == NULL))
> > + if (unlikely(sighand == NULL)) {
> > + rcu_read_unlock();
> > + local_irq_restore(*flags);
> > break;
> > + }
> >
> > - spin_lock_irqsave(&sighand->siglock, *flags);
> > - if (likely(sighand == tsk->sighand))
> > + spin_lock(&sighand->siglock);
> > + if (likely(sighand == tsk->sighand)) {
> > + rcu_read_unlock();
> > break;
> > - spin_unlock_irqrestore(&sighand->siglock, *flags);
> > + }
> > + spin_unlock(&sighand->siglock);
> > + rcu_read_unlock();
> > + local_irq_restore(*flags);
> > }
> > - rcu_read_unlock();
> >
> > return sighand;
> > }
> > diff --git a/kernel/softirq.c b/kernel/softirq.c
> > index 40cf63d..fca82c3 100644
> > --- a/kernel/softirq.c
> > +++ b/kernel/softirq.c
> > @@ -315,16 +315,24 @@ static inline void invoke_softirq(void)
> > {
> > if (!force_irqthreads)
> > __do_softirq();
> > - else
> > + else {
> > + __local_bh_disable((unsigned long)__builtin_return_address(0),
> > + SOFTIRQ_OFFSET);
> > wakeup_softirqd();
> > + __local_bh_enable(SOFTIRQ_OFFSET);
> > + }
> > }
> > #else
> > static inline void invoke_softirq(void)
> > {
> > if (!force_irqthreads)
> > do_softirq();
> > - else
> > + else {
> > + __local_bh_disable((unsigned long)__builtin_return_address(0),
> > + SOFTIRQ_OFFSET);
> > wakeup_softirqd();
> > + __local_bh_enable(SOFTIRQ_OFFSET);
> > + }
> > }
> > #endif
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/
> >
> >
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/