Re: [RFC][PATCH 6/7] printk: use alternative printk buffers

From: Petr Mladek
Date: Thu Sep 29 2016 - 09:00:11 EST


On Tue 2016-09-27 23:22:36, Sergey Senozhatsky wrote:
> Use alt_printk buffer in in printk recursion-prone blocks:
> -- around logbuf_lock protected sections in vprintk_emit() and
> console_unlock()
> -- around down_trylock_console_sem() and up_console_sem()
>
> Note that it addresses deadlocks caused by recursiove printk()
> calls only.
>
> Examples:
>
> 1) printk() from logbuf_lock spin_lock section
>
> Assume the following code:
> printk()
> raw_spin_lock(&logbuf_lock);
> WARN_ON(1);
> raw_spin_unlock(&logbuf_lock);
>
> which now produces:
>
> ------------[ cut here ]------------
> WARNING: CPU: 0 PID: 366 at kernel/printk/printk.c:1811 vprintk_emit+0x1cd/0x438
> CPU: 0 PID: 366 Comm: bash
> Call Trace:
> [<ffffffff811e24d9>] dump_stack+0x68/0x92
> [<ffffffff810455cd>] __warn+0xc2/0xdd
> [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f
> [<ffffffff81091527>] vprintk_emit+0x1cd/0x438
> [<ffffffff810918fe>] vprintk_default+0x1d/0x1f
> [<ffffffff810fdf6a>] printk+0x48/0x50
> [..]
> [<ffffffff813caaaa>] entry_SYSCALL_64_fastpath+0x18/0xad
> ---[ end trace ]---
>
> 2) printk() from semaphore sem->lock spin_lock section
>
> Assume the following code
>
> printk()
> console_trylock()
> down_trylock()
> raw_spin_lock_irqsave(&sem->lock, flags);
> WARN_ON(1);
> raw_spin_unlock_irqrestore(&sem->lock, flags);
>
> which now produces:
>
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 363 at kernel/locking/semaphore.c:141 down_trylock+0x3d/0x62
> CPU: 1 PID: 363 Comm: bash
> Call Trace:
> [<ffffffff811e24e0>] dump_stack+0x68/0x92
> [<ffffffff810455cd>] __warn+0xc2/0xdd
> [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f
> [<ffffffff810838df>] down_trylock+0x3d/0x62
> [<ffffffff8109177e>] ? vprintk_emit+0x3f9/0x414
> [<ffffffff810905cb>] console_trylock+0x31/0xeb
> [<ffffffff8109177e>] vprintk_emit+0x3f9/0x414
> [<ffffffff81091905>] vprintk_default+0x1d/0x1f
> [<ffffffff810fdf71>] printk+0x48/0x50
> [..]
> [<ffffffff813caaaa>] entry_SYSCALL_64_fastpath+0x18/0xad
> ---[ end trace ]---
>
> 3) printk() from console_unlock()
>
> Assume the following code:
>
> printk()
> console_unlock()
> raw_spin_lock(&logbuf_lock);
> WARN_ON(1);
> raw_spin_unlock(&logbuf_lock);
>
> which now produces:
>
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 329 at kernel/printk/printk.c:2384 console_unlock+0x12d/0x559
> CPU: 1 PID: 329 Comm: bash
> Call Trace:
> [<ffffffff811f2380>] dump_stack+0x68/0x92
> [<ffffffff8103d649>] __warn+0xb8/0xd3
> [<ffffffff8103d6ca>] warn_slowpath_null+0x18/0x1a
> [<ffffffff8107f917>] console_unlock+0x12d/0x559
> [<ffffffff810772c4>] ? trace_hardirqs_on_caller+0x16d/0x189
> [<ffffffff810772ed>] ? trace_hardirqs_on+0xd/0xf
> [<ffffffff810800a6>] vprintk_emit+0x363/0x374
> [<ffffffff81080219>] vprintk_default+0x18/0x1a
> [<ffffffff810c7f77>] printk+0x43/0x4b
> [..]
> [<ffffffff814c2d6a>] entry_SYSCALL_64_fastpath+0x18/0xad
> ---[ end trace ]---
>
> 4) printk() from try_to_wake_up()
>
> Assume the following code:
>
> printk()
> console_unlock()
> up()
> try_to_wake_up()
> raw_spin_lock_irqsave(&p->pi_lock, flags);
> WARN_ON(1);
> raw_spin_unlock_irqrestore(&p->pi_lock, flags);
>
> which now produces:
>
> ------------[ cut here ]------------
> WARNING: CPU: 3 PID: 363 at kernel/sched/core.c:2028 try_to_wake_up+0x7f/0x4f7
> CPU: 3 PID: 363 Comm: bash
> Call Trace:
> [<ffffffff811e2557>] dump_stack+0x68/0x92
> [<ffffffff810455cd>] __warn+0xc2/0xdd
> [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f
> [<ffffffff8106c8d0>] try_to_wake_up+0x7f/0x4f7
> [<ffffffff8106cd5d>] wake_up_process+0x15/0x17
> [<ffffffff813c82c6>] __up.isra.0+0x56/0x63
> [<ffffffff810839a3>] up+0x32/0x42
> [<ffffffff8108f624>] __up_console_sem+0x37/0x55
> [<ffffffff810910ff>] console_unlock+0x21e/0x4c2
> [<ffffffff810917bf>] vprintk_emit+0x41c/0x462
> [<ffffffff81091971>] vprintk_default+0x1d/0x1f
> [<ffffffff810fdfdd>] printk+0x48/0x50
> [..]
> [<ffffffff813cab2a>] entry_SYSCALL_64_fastpath+0x18/0xad
> ---[ end trace ]---
>
> another example of WARN_ON() from scheduler code:
>
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 361 at kernel/sched/core.c:2026 try_to_wake_up+0x6c/0x4e4
> CPU: 1 PID: 361 Comm: bash
> Call Trace:
> <IRQ> [<ffffffff811e2544>] dump_stack+0x68/0x92
> [<ffffffff810455cd>] __warn+0xc2/0xdd
> [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f
> [<ffffffff8106c8bd>] try_to_wake_up+0x6c/0x4e4
> [<ffffffff81089760>] ? lock_acquire+0x130/0x1c4
> [<ffffffff8106cd4a>] wake_up_process+0x15/0x17
> [<ffffffff8105a696>] wake_up_worker+0x28/0x2a
> [<ffffffff8105a869>] insert_work+0xa5/0xb1
> [<ffffffff8105bb99>] __queue_work+0x374/0x509
> [<ffffffff8105bd68>] queue_work_on+0x3a/0x64
> [<ffffffff812d7fd1>] dbs_irq_work+0x24/0x27
> [<ffffffff810ebbe7>] irq_work_run_list+0x43/0x67
> [<ffffffff810ebe26>] irq_work_run+0x2a/0x44
> [<ffffffff81019196>] smp_irq_work_interrupt+0x2e/0x35
> [<ffffffff813cccf9>] irq_work_interrupt+0x89/0x90
> [<ffffffff813cd1fe>] ? __do_softirq+0x96/0x434
> [<ffffffff813cd207>] ? __do_softirq+0x9f/0x434
> [<ffffffff8104b6e4>] irq_exit+0x40/0x90
> [<ffffffff813ccee9>] smp_apic_timer_interrupt+0x42/0x4d
> [<ffffffff813cc2d9>] apic_timer_interrupt+0x89/0x90
> <EOI> [<ffffffff8101c578>] ? native_sched_clock+0x7/0xbc
> [<ffffffff81072db7>] ? sched_clock_cpu+0x17/0xc6
> [<ffffffff81072fef>] running_clock+0x10/0x12
> [<ffffffff810917d8>] vprintk_emit+0x448/0x462
> [<ffffffff8109195e>] vprintk_default+0x1d/0x1f
> [<ffffffff810fdfca>] printk+0x48/0x50
> [..]
> [<ffffffff813cab2a>] entry_SYSCALL_64_fastpath+0x18/0xad
> ---[ end trace ]---
>
> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
> ---
> kernel/printk/printk.c | 27 ++++++++++++++++++++-------
> 1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 2afa16b..e5dacfb 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -1791,7 +1791,7 @@ asmlinkage int vprintk_emit(int facility, int level,
> zap_locks();
> }
>
> - lockdep_off();
> + alt_printk_enter();

IMHO, we could not longer enter vprintk_emit() recursively. The same
section that was guarded by logbuf_cpu is guarded by
alt_printk_enter()/exit() now.

IMHO, we could remove all the logic around the recursion. Then we
could even disable/enable irqs inside alt_printk_enter()/exit().

And to correct myself from the previous mail. It is enough to disable
IRQs. It is enough to make sure that we will not preempt and will
stay on the same CPU.

> /* This stops the holder of console_sem just where we want him */
> raw_spin_lock(&logbuf_lock);
> logbuf_cpu = this_cpu;
> @@ -1900,12 +1900,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>
> logbuf_cpu = UINT_MAX;
> raw_spin_unlock(&logbuf_lock);
> - lockdep_on();
> + alt_printk_exit();
> local_irq_restore(flags);
>
> /* If called from the scheduler, we can not call up(). */
> if (!in_sched) {
> - lockdep_off();
> /*
> * Try to acquire and then immediately release the console
> * semaphore. The release will print out buffers and wake up
> @@ -1913,7 +1912,6 @@ asmlinkage int vprintk_emit(int facility, int level,
> */
> if (console_trylock())
> console_unlock();
> - lockdep_on();
> }
>
> return printed_len;
> @@ -2239,8 +2237,18 @@ EXPORT_SYMBOL(console_lock);
> */
> int console_trylock(void)
> {
> - if (down_trylock_console_sem())
> + unsigned long flags;
> + int lock_failed;
> +
> + local_irq_save(flags);
> + alt_printk_enter();
> + lock_failed = down_trylock_console_sem();
> + alt_printk_exit();
> + local_irq_restore(flags);
> +
> + if (lock_failed)
> return 0;
> +
> if (console_suspended) {
> up_console_sem();
> return 0;
> @@ -2395,7 +2403,9 @@ void console_unlock(void)
> size_t len;
> int level;
>
> - raw_spin_lock_irqsave(&logbuf_lock, flags);
> + local_irq_save(flags);
> + alt_printk_enter();
> + raw_spin_lock(&logbuf_lock);
> if (seen_seq != log_next_seq) {
> wake_klogd = true;
> seen_seq = log_next_seq;
> @@ -2456,6 +2466,7 @@ void console_unlock(void)
> stop_critical_timings(); /* don't trace print latency */
> call_console_drivers(level, ext_text, ext_len, text, len);
> start_critical_timings();
> + alt_printk_exit();
> local_irq_restore(flags);
>
> if (do_cond_resched)
> @@ -2479,7 +2490,9 @@ void console_unlock(void)
> */
> raw_spin_lock(&logbuf_lock);
> retry = console_seq != log_next_seq;
> - raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> + raw_spin_unlock(&logbuf_lock);
> + alt_printk_exit();
> + local_irq_restore(flags);

We should mention that this patch makes an obsolete artefact from
printk_deferred(). It opens the door for another big cleanup and
relief.

Best Regards,
Petr