Re: smp_call_function_single lockups

From: Linus Torvalds
Date: Mon Feb 23 2015 - 14:32:57 EST


On Mon, Feb 23, 2015 at 6:01 AM, Rafael David Tinoco <inaddy@xxxxxxxxxx> wrote:
>
> This is v3.19 + your patch (smp acquire/release)
> - (nested kvm with 2 vcpus on top of proliant with x2apic cluster mode
> and acpi_idle)

Hmm. There is absolutely nothing else going on on that machine, except
for the single call to smp_call_function_single() that is waiting for
the CSD to be released.

> * It looks like we got locked because of reentrant flush_tlb_* through
> smp_call_*
> but I'll leave it to you.

No, that is all a perfectly regular callchain:

.. native_flush_tlb_others -> smp_call_function_many ->
smp_call_function_single

but the stack contains some stale addresses (one is probably just from
smp_call_function_single() calling into "generic_exec_single()", and
thus the stack contains the return address inside
smp_call_function_single() in _addition_ to the actual place where the
watchdog timer then interrupted it).

It all really looks very regular and sane, and looks like
smp_call_function_single() is happily just waiting for the IPI to
finish in the (inlined) csd_lock_wait().

I see nothing wrong at all.

However, here's a patch to the actual x86
smp_call_function_single_interrupt() handler, which probably doesn't
make any difference at all, but does:

- gets rid of the incredibly crazy and ugly smp_entering_irq() that
inexplicably pairs with irq_exit()

- makes the SMP IPI functions dot he APIC ACK cycle at the *end*
rather than at the beginning.

The exact placement of the ACK should make absolutely no difference,
since interrupts should be disabled for the whole period anyway. But
I'm desperate and am flailing around worrying about x2apic bugs and
whatnot in addition to whatever bugs we migth have in this area in sw.

So maybe there is some timing thing. This makes us consistently ack
the IPI _after_ we have cleared the list of smp call actions, and
executed the list, rather than ack it before, and then go on to muck
with the list.

Plus that old code really annoyed me anyway.

So I don't see why this should matter, but since I don't see how the
bug could happen in the first place, might as well try it..

Linus
arch/x86/kernel/smp.c | 35 +++++++++++++++++++++++------------
1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index be8e1bde07aa..ed43259111d8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -265,12 +265,23 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
*/
}

-static inline void smp_entering_irq(void)
+/*
+ * This is the same as "entering_irq()", except it doesn't
+ * do the 'exit_idle()'.
+ *
+ * It pairs with smp_irq_exit().
+ */
+static inline void smp_irq_enter(void)
{
- ack_APIC_irq();
irq_enter();
}

+static inline void smp_irq_exit(void)
+{
+ irq_exit();
+ ack_APIC_irq();
+}
+
__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
{
/*
@@ -279,11 +290,11 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
* scheduler_ipi(). This is OK, since those functions are allowed
* to nest.
*/
- smp_entering_irq();
+ smp_irq_enter();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
- exiting_irq();
+ smp_irq_exit();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
@@ -297,18 +308,18 @@ static inline void __smp_call_function_interrupt(void)

__visible void smp_call_function_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_irq_enter();
__smp_call_function_interrupt();
- exiting_irq();
+ smp_irq_exit();
}

__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_irq_enter();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
- exiting_irq();
+ smp_irq_exit();
}

static inline void __smp_call_function_single_interrupt(void)
@@ -319,18 +330,18 @@ static inline void __smp_call_function_single_interrupt(void)

__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_irq_enter();
__smp_call_function_single_interrupt();
- exiting_irq();
+ smp_irq_exit();
}

__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
- smp_entering_irq();
+ smp_irq_enter();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
- exiting_irq();
+ smp_irq_exit();
}

static int __init nonmi_ipi_setup(char *str)