Re: [RFC][PATCH 3/3] x86/entry, ORC: Teach objtool/unwind_orc about stack irq swizzles

From: Josh Poimboeuf
Date: Thu May 07 2020 - 14:30:58 EST


On Thu, May 07, 2020 at 07:38:09PM +0200, Peter Zijlstra wrote:
> On Thu, May 07, 2020 at 06:10:23PM +0200, Peter Zijlstra wrote:
> > Thomas would very much like objtool to understand and generate correct
> > ORC unwind information for the minimal stack swizzle sequence:
> >
> > mov %rsp, (%[ts])
> > mov %[ts], %rsp
> > ...
> > pop %rsp
> >
> > This sequence works for the fp and guess unwinders -- all they need is
> > that top-of-stack link set up by the first instruction.
> >
> > The previous entry_64.S code worked with "UNWIND_HINT_REGS indirect=1"
> > hints to inform the unwinder about the stack-swizzle, but because
> > we've now already entered C, we can no longer point to a REGS. In
> > fact, due to being in C we don't even have a reliable sp_offset to
> > anything.
> >
> > None of the existing UNWIND_HINT() functionality is quite sufficient
> > to generate the right thing, but SP_INDIRECT is still the closest, so
> > extend it.
> >
> > When SP_INDIRECT is combined with .end=1 (which is otherwise unused,
> > except for sp_reg == UNDEFINED):
> >
> > - change it from (sp+sp_offset) to (sp)+sp_offset
> > - have objtool preserve sp_offset from the previous state
> > - change "pop %rsp" handling to restore the CFI state from before the
> > hint.
> >
> > NOTES:
> >
> > - We now have an instruction with stackops and a hint; make hint take
> > precedence over stackops.
> >
> > - Due to the reverse search in "pop %rsp" we must
> > fill_alternative_cfi() before validate_branch().
> >
> > - This all isn't really pretty, but it works and gets Thomas the code
> > sequence he wants.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> > ---
>
> Much simpler, also works.

Doing the stack switch in inline asm is just nasty.

Also, a frame pointer makes this SO much easier for ORC/objtool, no
funky hints needed. In fact maybe we can get rid of the indirect hint
things altogether, which means even more deleted code.

This is much cleaner, and it boots:

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3f9b2555e6fb..4a25f72f969f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -718,15 +718,6 @@ __visible void __xen_pv_evtchn_do_upcall(void)
irq_exit_rcu();
}

-/*
- * Separate function as objtool is unhappy about having
- * the macro at the call site.
- */
-static noinstr void run_on_irqstack(void)
-{
- RUN_ON_IRQSTACK(__xen_pv_evtchn_do_upcall);
-}
-
__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs;
@@ -739,7 +730,7 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
__xen_pv_evtchn_do_upcall();
instr_end();
} else {
- run_on_irqstack();
+ RUN_ON_IRQSTACK(__xen_pv_evtchn_do_upcall);
}

set_irq_regs(old_regs);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3046dfc69b8c..d036dc831a23 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1295,3 +1295,31 @@ SYM_CODE_START(rewind_stack_do_exit)
call do_exit
SYM_CODE_END(rewind_stack_do_exit)
.popsection
+
+/*
+ * rdi: new stack pointer
+ * rsi: function pointer
+ * rdx: arg1 (can be NULL if none)
+ */
+SYM_FUNC_START(call_on_stack)
+ /*
+ * Save the frame pointer unconditionally. This allows the ORC
+ * unwinder to handle the stack switch.
+ */
+ pushq %rbp
+ mov %rsp, %rbp
+
+ /*
+ * The unwinder relies on the word at the end of the new stack page
+ * linking back to the previous RSP.
+ */
+ mov %rsp, -8(%rdi)
+ lea -8(%rdi), %rsp
+ mov %rdx, %rdi
+ CALL_NOSPEC rsi
+
+ /* Restore the previous stack pointer from RBP. */
+ leaveq
+
+ ret
+SYM_FUNC_END(call_on_stack)
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index f307d4c27f84..131a6c689b1c 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -7,42 +7,26 @@
#include <asm/processor.h>

#ifdef CONFIG_X86_64
+
+void call_on_stack(void *stack, void *func, void *arg);
+
static __always_inline bool irqstack_active(void)
{
return __this_cpu_read(irq_count) != -1;
}

-#define __RUN_ON_IRQSTACK(_asm, ...) \
+#define __RUN_ON_IRQSTACK(func, arg) \
do { \
- unsigned long tos; \
- \
lockdep_assert_irqs_disabled(); \
- \
- tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8; \
- \
- __this_cpu_add(irq_count, 1); \
- asm volatile( \
- "movq %%rsp, (%[ts]) \n" \
- "movq %[ts], %%rsp \n" \
- ASM_INSTR_BEGIN \
- _asm " \n" \
- ASM_INSTR_END \
- "popq %%rsp \n" \
- : \
- : [ts] "r" (tos), ##__VA_ARGS__ \
- : "memory" \
- ); \
+ call_on_stack(__this_cpu_read(hardirq_stack_ptr), func, arg); \
__this_cpu_sub(irq_count, 1); \
} while (0)

-/*
- * Macro to emit code for running @func on the irq stack.
- */
#define RUN_ON_IRQSTACK(func) \
- __RUN_ON_IRQSTACK("call" __ASM_FORM(func))
+ __RUN_ON_IRQSTACK(func, NULL)

#define RUN_ON_IRQSTACK_ARG1(func, arg) \
- __RUN_ON_IRQSTACK("call" __ASM_FORM(func), "D" (arg))
+ __RUN_ON_IRQSTACK(func, arg)

#else /* CONFIG_X86_64 */
static __always_inline bool irqstack_active(void) { return false; }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index c41b0a2859d7..30b6ddf64dc7 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -74,7 +74,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)

static noinstr void handle_irq_on_irqstack(struct irq_desc *desc)
{
- __RUN_ON_IRQSTACK(CALL_NOSPEC, THUNK_TARGET(desc->handle_irq), "D" (desc));
+ RUN_ON_IRQSTACK_ARG1(desc->handle_irq, desc);
}

void handle_irq(struct irq_desc *desc, struct pt_regs *regs)