Re: [GIT PULL] RCU fixes for v3.0

From: Ed Tomlinson
Date: Wed Jul 20 2011 - 17:55:54 EST


On Wednesday 20 July 2011 17:04:26 Ingo Molnar wrote:
>
> * Ingo Molnar <mingo@xxxxxxx> wrote:
>
> > Having put some testing into your rcu/urgent branch today i'd feel
> > more comfortable with taking this plus perhaps an RCU_BOOST
> > disabling patch. That makes it all fundamentally tested by a number
> > of people (including those who reported/reproduced the problems).
> >
> > Linus, would that approach be fine with you? I'll send an RFC pull
> > request for the 6 patches as a reply to this mail, in a couple of
> > minutes.
>
> here it is:
>
> Linus,
>
> Please pull the latest core-urgent-for-linus git tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git core-urgent-for-linus

I've been running on the previous version of this tree along with a fix for the first patch. An
additional patch was added this morning to prevent an invalid warning from triggering. Aside
from that warning the system was stable with no other unexpected output in dmesg.

Resetting to master and pulling the above tree gives me a clean boot. On atleast one box that was
seeing problems this seems too be making things better (boost and threadirqs enabled).

Thanks
Ed

> In addition to the testing Paul & co has performed i have boot tested
> x86 defconfig/allno/allyes/allmod and a fair number of randconfigs,
> and i build tested a dozen architecture defconfigs, amongst them the
> key ones.
>
> ( We could also mark RCU_BOOST in init/Kconfig as EXPERIMENTAL, to
> further de-risk it - but that change is not part of this pull
> request. )
>
> Thanks,
>
> Ingo
>
> ------------------>
> Paul E. McKenney (5):
> rcu: decrease rcu_report_exp_rnp coupling with scheduler
> rcu: Fix RCU_BOOST race handling current->rcu_read_unlock_special
> rcu: Streamline code produced by __rcu_read_unlock()
> rcu: protect __rcu_read_unlock() against scheduler-using irq handlers
> signal: align __lock_task_sighand() irq disabling and RCU
>
> Peter Zijlstra (2):
> sched: Add irq_{enter,exit}() to scheduler_ipi()
> softirq,rcu: Inform RCU of irq_exit() activity
>
>
> include/linux/sched.h | 3 ++
> kernel/rcutree_plugin.h | 53 ++++++++++++++++++++++++++++++++++------------
> kernel/sched.c | 44 +++++++++++++++++++++++++++++++++-----
> kernel/signal.c | 19 +++++++++++-----
> kernel/softirq.c | 12 ++++++++-
> 5 files changed, 103 insertions(+), 28 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 496770a..76676a4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1254,6 +1254,9 @@ struct task_struct {
> #ifdef CONFIG_PREEMPT_RCU
> int rcu_read_lock_nesting;
> char rcu_read_unlock_special;
> +#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
> + int rcu_boosted;
> +#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
> struct list_head rcu_node_entry;
> #endif /* #ifdef CONFIG_PREEMPT_RCU */
> #ifdef CONFIG_TREE_PREEMPT_RCU
> diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
> index 75113cb..8aafbb8 100644
> --- a/kernel/rcutree_plugin.h
> +++ b/kernel/rcutree_plugin.h
> @@ -68,6 +68,7 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
> DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
> static struct rcu_state *rcu_state = &rcu_preempt_state;
>
> +static void rcu_read_unlock_special(struct task_struct *t);
> static int rcu_preempted_readers_exp(struct rcu_node *rnp);
>
> /*
> @@ -147,7 +148,7 @@ static void rcu_preempt_note_context_switch(int cpu)
> struct rcu_data *rdp;
> struct rcu_node *rnp;
>
> - if (t->rcu_read_lock_nesting &&
> + if (t->rcu_read_lock_nesting > 0 &&
> (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
>
> /* Possibly blocking in an RCU read-side critical section. */
> @@ -190,6 +191,14 @@ static void rcu_preempt_note_context_switch(int cpu)
> rnp->gp_tasks = &t->rcu_node_entry;
> }
> raw_spin_unlock_irqrestore(&rnp->lock, flags);
> + } else if (t->rcu_read_lock_nesting < 0 &&
> + t->rcu_read_unlock_special) {
> +
> + /*
> + * Complete exit from RCU read-side critical section on
> + * behalf of preempted instance of __rcu_read_unlock().
> + */
> + rcu_read_unlock_special(t);
> }
>
> /*
> @@ -284,7 +293,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
> * notify RCU core processing or task having blocked during the RCU
> * read-side critical section.
> */
> -static void rcu_read_unlock_special(struct task_struct *t)
> +static noinline void rcu_read_unlock_special(struct task_struct *t)
> {
> int empty;
> int empty_exp;
> @@ -309,7 +318,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
> }
>
> /* Hardware IRQ handlers cannot block. */
> - if (in_irq()) {
> + if (in_irq() || in_serving_softirq()) {
> local_irq_restore(flags);
> return;
> }
> @@ -342,6 +351,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
> #ifdef CONFIG_RCU_BOOST
> if (&t->rcu_node_entry == rnp->boost_tasks)
> rnp->boost_tasks = np;
> + /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */
> + if (t->rcu_boosted) {
> + special |= RCU_READ_UNLOCK_BOOSTED;
> + t->rcu_boosted = 0;
> + }
> #endif /* #ifdef CONFIG_RCU_BOOST */
> t->rcu_blocked_node = NULL;
>
> @@ -358,7 +372,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
> #ifdef CONFIG_RCU_BOOST
> /* Unboost if we were boosted. */
> if (special & RCU_READ_UNLOCK_BOOSTED) {
> - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
> rt_mutex_unlock(t->rcu_boost_mutex);
> t->rcu_boost_mutex = NULL;
> }
> @@ -387,13 +400,22 @@ void __rcu_read_unlock(void)
> struct task_struct *t = current;
>
> barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
> - --t->rcu_read_lock_nesting;
> - barrier(); /* decrement before load of ->rcu_read_unlock_special */
> - if (t->rcu_read_lock_nesting == 0 &&
> - unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
> - rcu_read_unlock_special(t);
> + if (t->rcu_read_lock_nesting != 1)
> + --t->rcu_read_lock_nesting;
> + else {
> + t->rcu_read_lock_nesting = INT_MIN;
> + barrier(); /* assign before ->rcu_read_unlock_special load */
> + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
> + rcu_read_unlock_special(t);
> + barrier(); /* ->rcu_read_unlock_special load before assign */
> + t->rcu_read_lock_nesting = 0;
> + }
> #ifdef CONFIG_PROVE_LOCKING
> - WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
> + {
> + int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
> +
> + WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
> + }
> #endif /* #ifdef CONFIG_PROVE_LOCKING */
> }
> EXPORT_SYMBOL_GPL(__rcu_read_unlock);
> @@ -589,7 +611,8 @@ static void rcu_preempt_check_callbacks(int cpu)
> rcu_preempt_qs(cpu);
> return;
> }
> - if (per_cpu(rcu_preempt_data, cpu).qs_pending)
> + if (t->rcu_read_lock_nesting > 0 &&
> + per_cpu(rcu_preempt_data, cpu).qs_pending)
> t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
> }
>
> @@ -695,9 +718,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
>
> raw_spin_lock_irqsave(&rnp->lock, flags);
> for (;;) {
> - if (!sync_rcu_preempt_exp_done(rnp))
> + if (!sync_rcu_preempt_exp_done(rnp)) {
> + raw_spin_unlock_irqrestore(&rnp->lock, flags);
> break;
> + }
> if (rnp->parent == NULL) {
> + raw_spin_unlock_irqrestore(&rnp->lock, flags);
> wake_up(&sync_rcu_preempt_exp_wq);
> break;
> }
> @@ -707,7 +733,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
> raw_spin_lock(&rnp->lock); /* irqs already disabled */
> rnp->expmask &= ~mask;
> }
> - raw_spin_unlock_irqrestore(&rnp->lock, flags);
> }
>
> /*
> @@ -1174,7 +1199,7 @@ static int rcu_boost(struct rcu_node *rnp)
> t = container_of(tb, struct task_struct, rcu_node_entry);
> rt_mutex_init_proxy_locked(&mtx, t);
> t->rcu_boost_mutex = &mtx;
> - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
> + t->rcu_boosted = 1;
> raw_spin_unlock_irqrestore(&rnp->lock, flags);
> rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
> rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 3dc716f..31e92ae 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
> }
>
> #ifdef CONFIG_SMP
> -static void sched_ttwu_pending(void)
> +static void sched_ttwu_do_pending(struct task_struct *list)
> {
> struct rq *rq = this_rq();
> - struct task_struct *list = xchg(&rq->wake_list, NULL);
> -
> - if (!list)
> - return;
>
> raw_spin_lock(&rq->lock);
>
> @@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void)
> raw_spin_unlock(&rq->lock);
> }
>
> +#ifdef CONFIG_HOTPLUG_CPU
> +
> +static void sched_ttwu_pending(void)
> +{
> + struct rq *rq = this_rq();
> + struct task_struct *list = xchg(&rq->wake_list, NULL);
> +
> + if (!list)
> + return;
> +
> + sched_ttwu_do_pending(list);
> +}
> +
> +#endif /* CONFIG_HOTPLUG_CPU */
> +
> void scheduler_ipi(void)
> {
> - sched_ttwu_pending();
> + struct rq *rq = this_rq();
> + struct task_struct *list = xchg(&rq->wake_list, NULL);
> +
> + if (!list)
> + return;
> +
> + /*
> + * Not all reschedule IPI handlers call irq_enter/irq_exit, since
> + * traditionally all their work was done from the interrupt return
> + * path. Now that we actually do some work, we need to make sure
> + * we do call them.
> + *
> + * Some archs already do call them, luckily irq_enter/exit nest
> + * properly.
> + *
> + * Arguably we should visit all archs and update all handlers,
> + * however a fair share of IPIs are still resched only so this would
> + * somewhat pessimize the simple resched case.
> + */
> + irq_enter();
> + sched_ttwu_do_pending(list);
> + irq_exit();
> }
>
> static void ttwu_queue_remote(struct task_struct *p, int cpu)
> diff --git a/kernel/signal.c b/kernel/signal.c
> index ff76786..415d85d 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -1178,18 +1178,25 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
> {
> struct sighand_struct *sighand;
>
> - rcu_read_lock();
> for (;;) {
> + local_irq_save(*flags);
> + rcu_read_lock();
> sighand = rcu_dereference(tsk->sighand);
> - if (unlikely(sighand == NULL))
> + if (unlikely(sighand == NULL)) {
> + rcu_read_unlock();
> + local_irq_restore(*flags);
> break;
> + }
>
> - spin_lock_irqsave(&sighand->siglock, *flags);
> - if (likely(sighand == tsk->sighand))
> + spin_lock(&sighand->siglock);
> + if (likely(sighand == tsk->sighand)) {
> + rcu_read_unlock();
> break;
> - spin_unlock_irqrestore(&sighand->siglock, *flags);
> + }
> + spin_unlock(&sighand->siglock);
> + rcu_read_unlock();
> + local_irq_restore(*flags);
> }
> - rcu_read_unlock();
>
> return sighand;
> }
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 40cf63d..fca82c3 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -315,16 +315,24 @@ static inline void invoke_softirq(void)
> {
> if (!force_irqthreads)
> __do_softirq();
> - else
> + else {
> + __local_bh_disable((unsigned long)__builtin_return_address(0),
> + SOFTIRQ_OFFSET);
> wakeup_softirqd();
> + __local_bh_enable(SOFTIRQ_OFFSET);
> + }
> }
> #else
> static inline void invoke_softirq(void)
> {
> if (!force_irqthreads)
> do_softirq();
> - else
> + else {
> + __local_bh_disable((unsigned long)__builtin_return_address(0),
> + SOFTIRQ_OFFSET);
> wakeup_softirqd();
> + __local_bh_enable(SOFTIRQ_OFFSET);
> + }
> }
> #endif
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/