Re: BUG in 2.6.20-rt8

From: Paul E. McKenney
Date: Sun Feb 25 2007 - 20:25:41 EST


On Sun, Feb 25, 2007 at 07:27:47AM +0100, Ingo Molnar wrote:
>
> * Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> wrote:
>
> > I got the following running stock 2.6.20-rt8 on an 4-CPU 1.8GHz
> > Opteron box. The machine continued to run a few rounds of kernbench
> > and LTP. Looks a bit scary -- a tasklet was "stolen" from
> > __tasklet_action().
> >
> > Thoughts? In the meantime, kicking it off again to see if it repeats.
>
> > BUG: at kernel/softirq.c:559 __tasklet_action()
>
> this seems to happen very sporadically. Seems to happen more likely on
> hyperthreading CPUs. It is very likely caused by the
> redesign-tasklet-locking-to-be-sane patch below - which is a quick hack
> of mine from early -rt days. Can you see any obvious bug in it? The
> cmpxchg logic is certainly a bit ... tricky, locking-wise.

One thing thus far...

There is the repeated-reschedule case that looks like it can result
in a tasklet being dropped on the floor:

o __tasklet_action() runs out of "loops" (at which point,
we know TASKLET_STATE_SCHED is clear due to the prior test).

o Someone calls tasklet_schedule(), which sets TASKLET_STATE_SCHED
and calls __tasklet_schedule(), which fails in tasklet_trylock()
due to __tasklet_action() still having the TASKLET_STATE_RUN
bit set. So the tasklet is not queued.

o __tasklet_action() then forcibly unlocks, and the tasklet can
never run again, because its TASKLET_STATE_SCHED is already
set.

But this would have printed the "hm" message, which I didn't see.
And been a low-probability race on top of that.

And I have not yet come up with a better way of handling this -- and
if I did, it would likely involve more rather than less cmpxchg. ;-)

If I can figure out a reasonable assertion, I will throw it at Promela
and see what happens.

Thanx, Paul

> ---------------->
> From: Ingo Molnar <mingo@xxxxxxx>
>
> tasklet redesign: make it saner and make it easier to thread.
>
> Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
>
> ----
> include/linux/interrupt.h | 39 ++++++-----
> kernel/softirq.c | 155 +++++++++++++++++++++++++++++++---------------
> 2 files changed, 128 insertions(+), 66 deletions(-)
>
> Index: linux/include/linux/interrupt.h
> ===================================================================
> --- linux.orig/include/linux/interrupt.h
> +++ linux/include/linux/interrupt.h
> @@ -276,8 +276,9 @@ extern void wait_for_softirq(int softirq
> to be executed on some cpu at least once after this.
> * If the tasklet is already scheduled, but its excecution is still not
> started, it will be executed only once.
> - * If this tasklet is already running on another CPU (or schedule is called
> - from tasklet itself), it is rescheduled for later.
> + * If this tasklet is already running on another CPU, it is rescheduled
> + for later.
> + * Schedule must not be called from the tasklet itself (a lockup occurs)
> * Tasklet is strictly serialized wrt itself, but not
> wrt another tasklets. If client needs some intertask synchronization,
> he makes it with spinlocks.
> @@ -302,15 +303,25 @@ struct tasklet_struct name = { NULL, 0,
> enum
> {
> TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
> - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
> + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
> + TASKLET_STATE_PENDING /* Tasklet is pending */
> };
>
> -#ifdef CONFIG_SMP
> +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
> +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
> +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
> +
> +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
> static inline int tasklet_trylock(struct tasklet_struct *t)
> {
> return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
> }
>
> +static inline int tasklet_tryunlock(struct tasklet_struct *t)
> +{
> + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
> +}
> +
> static inline void tasklet_unlock(struct tasklet_struct *t)
> {
> smp_mb__before_clear_bit();
> @@ -322,9 +333,10 @@ static inline void tasklet_unlock_wait(s
> while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
> }
> #else
> -#define tasklet_trylock(t) 1
> -#define tasklet_unlock_wait(t) do { } while (0)
> -#define tasklet_unlock(t) do { } while (0)
> +# define tasklet_trylock(t) 1
> +# define tasklet_tryunlock(t) 1
> +# define tasklet_unlock_wait(t) do { } while (0)
> +# define tasklet_unlock(t) do { } while (0)
> #endif
>
> extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
> @@ -357,17 +369,8 @@ static inline void tasklet_disable(struc
> smp_mb();
> }
>
> -static inline void tasklet_enable(struct tasklet_struct *t)
> -{
> - smp_mb__before_atomic_dec();
> - atomic_dec(&t->count);
> -}
> -
> -static inline void tasklet_hi_enable(struct tasklet_struct *t)
> -{
> - smp_mb__before_atomic_dec();
> - atomic_dec(&t->count);
> -}
> +extern fastcall void tasklet_enable(struct tasklet_struct *t);
> +extern fastcall void tasklet_hi_enable(struct tasklet_struct *t);
>
> extern void tasklet_kill(struct tasklet_struct *t);
> extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
> Index: linux/kernel/softirq.c
> ===================================================================
> --- linux.orig/kernel/softirq.c
> +++ linux/kernel/softirq.c
> @@ -464,14 +464,24 @@ struct tasklet_head
> static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
> static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
>
> +static void inline
> +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
> +{
> + if (tasklet_trylock(t)) {
> + WARN_ON(t->next != NULL);
> + t->next = head->list;
> + head->list = t;
> + raise_softirq_irqoff(nr);
> + tasklet_unlock(t);
> + }
> +}
> +
> void fastcall __tasklet_schedule(struct tasklet_struct *t)
> {
> unsigned long flags;
>
> local_irq_save(flags);
> - t->next = __get_cpu_var(tasklet_vec).list;
> - __get_cpu_var(tasklet_vec).list = t;
> - raise_softirq_irqoff(TASKLET_SOFTIRQ);
> + __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
> local_irq_restore(flags);
> }
>
> @@ -482,81 +492,130 @@ void fastcall __tasklet_hi_schedule(stru
> unsigned long flags;
>
> local_irq_save(flags);
> - t->next = __get_cpu_var(tasklet_hi_vec).list;
> - __get_cpu_var(tasklet_hi_vec).list = t;
> - raise_softirq_irqoff(HI_SOFTIRQ);
> + __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
> local_irq_restore(flags);
> }
>
> EXPORT_SYMBOL(__tasklet_hi_schedule);
>
> -static void tasklet_action(struct softirq_action *a)
> +void fastcall tasklet_enable(struct tasklet_struct *t)
> {
> - struct tasklet_struct *list;
> + if (!atomic_dec_and_test(&t->count))
> + return;
> + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
> + tasklet_schedule(t);
> +}
>
> - local_irq_disable();
> - list = __get_cpu_var(tasklet_vec).list;
> - __get_cpu_var(tasklet_vec).list = NULL;
> - local_irq_enable();
> +EXPORT_SYMBOL(tasklet_enable);
> +
> +void fastcall tasklet_hi_enable(struct tasklet_struct *t)
> +{
> + if (!atomic_dec_and_test(&t->count))
> + return;
> + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
> + tasklet_hi_schedule(t);
> +}
> +
> +EXPORT_SYMBOL(tasklet_hi_enable);
> +
> +static void
> +__tasklet_action(struct softirq_action *a, struct tasklet_struct *list)
> +{
> + int loops = 1000000;
>
> while (list) {
> struct tasklet_struct *t = list;
>
> list = list->next;
> + /*
> + * Should always succeed - after a tasklist got on the
> + * list (after getting the SCHED bit set from 0 to 1),
> + * nothing but the tasklet softirq it got queued to can
> + * lock it:
> + */
> + if (!tasklet_trylock(t)) {
> + WARN_ON(1);
> + continue;
> + }
> +
> + t->next = NULL;
> +
> + /*
> + * If we cannot handle the tasklet because it's disabled,
> + * mark it as pending. tasklet_enable() will later
> + * re-schedule the tasklet.
> + */
> + if (unlikely(atomic_read(&t->count))) {
> +out_disabled:
> + /* implicit unlock: */
> + wmb();
> + t->state = TASKLET_STATEF_PENDING;
> + continue;
> + }
>
> - if (tasklet_trylock(t)) {
> - if (!atomic_read(&t->count)) {
> - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
> - BUG();
> - t->func(t->data);
> + /*
> + * After this point on the tasklet might be rescheduled
> + * on another CPU, but it can only be added to another
> + * CPU's tasklet list if we unlock the tasklet (which we
> + * dont do yet).
> + */
> + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
> + WARN_ON(1);
> +
> +again:
> + t->func(t->data);
> +
> + /*
> + * Try to unlock the tasklet. We must use cmpxchg, because
> + * another CPU might have scheduled or disabled the tasklet.
> + * We only allow the STATE_RUN -> 0 transition here.
> + */
> + while (!tasklet_tryunlock(t)) {
> + /*
> + * If it got disabled meanwhile, bail out:
> + */
> + if (atomic_read(&t->count))
> + goto out_disabled;
> + /*
> + * If it got scheduled meanwhile, re-execute
> + * the tasklet function:
> + */
> + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
> + goto again;
> + if (!--loops) {
> + printk("hm, tasklet state: %08lx\n", t->state);
> + WARN_ON(1);
> tasklet_unlock(t);
> - continue;
> + break;
> }
> - tasklet_unlock(t);
> }
> -
> - local_irq_disable();
> - t->next = __get_cpu_var(tasklet_vec).list;
> - __get_cpu_var(tasklet_vec).list = t;
> - __do_raise_softirq_irqoff(TASKLET_SOFTIRQ);
> - local_irq_enable();
> }
> }
>
> -static void tasklet_hi_action(struct softirq_action *a)
> +static void tasklet_action(struct softirq_action *a)
> {
> struct tasklet_struct *list;
>
> local_irq_disable();
> - list = __get_cpu_var(tasklet_hi_vec).list;
> - __get_cpu_var(tasklet_hi_vec).list = NULL;
> + list = __get_cpu_var(tasklet_vec).list;
> + __get_cpu_var(tasklet_vec).list = NULL;
> local_irq_enable();
>
> - while (list) {
> - struct tasklet_struct *t = list;
> + __tasklet_action(a, list);
> +}
>
> - list = list->next;
> +static void tasklet_hi_action(struct softirq_action *a)
> +{
> + struct tasklet_struct *list;
>
> - if (tasklet_trylock(t)) {
> - if (!atomic_read(&t->count)) {
> - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
> - BUG();
> - t->func(t->data);
> - tasklet_unlock(t);
> - continue;
> - }
> - tasklet_unlock(t);
> - }
> + local_irq_disable();
> + list = __get_cpu_var(tasklet_hi_vec).list;
> + __get_cpu_var(tasklet_hi_vec).list = NULL;
> + local_irq_enable();
>
> - local_irq_disable();
> - t->next = __get_cpu_var(tasklet_hi_vec).list;
> - __get_cpu_var(tasklet_hi_vec).list = t;
> - __do_raise_softirq_irqoff(HI_SOFTIRQ);
> - local_irq_enable();
> - }
> + __tasklet_action(a, list);
> }
>
> -
> void tasklet_init(struct tasklet_struct *t,
> void (*func)(unsigned long), unsigned long data)
> {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/