[PATCH 8/8] x86/retpoline: Fix retpoline unwind

From: Peter Zijlstra
Date: Thu Apr 23 2020 - 08:52:15 EST


Currently objtool cannot understand retpolines, and thus cannot
generate ORC unwind information for them. This means that we cannot
unwind from the middle of a retpoline.

The recent ANNOTATE_INTRA_FUNCTION_CALL and UNWIND_HINT_RET_OFFSET
support in objtool enables it to understand the basic retpoline
construct. A further problem is that the ORC unwind information is
alternative invariant; IOW. every alternative should have the same
ORC, retpolines obviously violate this. This means we need to
out-of-line them.

Since all GCC generated code already uses out-of-line retpolines, this
should not affect performance much, if anything.

This will enable objtool to generate valid ORC data for the
out-of-line copies, which means we can correctly and reliably unwind
through a retpoline.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/asm-prototypes.h | 7 +++
arch/x86/include/asm/nospec-branch.h | 75 ++++++++--------------------------
arch/x86/lib/retpoline.S | 26 ++++++++++-
3 files changed, 49 insertions(+), 59 deletions(-)

--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -21,8 +21,15 @@ extern void cmpxchg8b_emu(void);
#define DECL_INDIRECT_THUNK(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);

+#define DECL_RETPOLINE(reg) \
+ extern asmlinkage void __x86_retpoline_ ## reg (void);
+
#undef GEN
#define GEN(reg) DECL_INDIRECT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>

+#undef GEN
+#define GEN(reg) DECL_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
+
#endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -13,15 +13,6 @@
#include <asm/unwind_hints.h>

/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
- ANNOTATE_IGNORE_ALTERNATIVE
-
-/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
@@ -83,44 +74,15 @@
.endm

/*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
- */
-.macro RETPOLINE_JMP reg:req
- call .Ldo_rop_\@
-.Lspec_trap_\@:
- pause
- lfence
- jmp .Lspec_trap_\@
-.Ldo_rop_\@:
- mov \reg, (%_ASM_SP)
- ret
-.endm
-
-/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
- */
-.macro RETPOLINE_CALL reg:req
- jmp .Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
- RETPOLINE_JMP \reg
-.Ldo_call_\@:
- call .Ldo_retpoline_jmp_\@
-.endm
-
-/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
*/
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(RETPOLINE_JMP %\reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
jmp *%\reg
#endif
@@ -128,10 +90,16 @@

.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg),\
- __stringify(RETPOLINE_CALL %\reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+ /*
+ * This cannot be ALTERNATIVE_2 like with JMP_NOSPEC, because ORC
+ * unwind data is alternative invariant and needs stack modifying
+ * instructions to be in the same place for all alternatives.
+ *
+ * IOW the CALL instruction must be at the same offset for all cases.
+ */
+ ALTERNATIVE "", "lfence", X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+ __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE
#else
call *%\reg
#endif
@@ -165,16 +133,12 @@
* which is ensured when CONFIG_RETPOLINE is defined.
*/
# define CALL_NOSPEC \
- ANNOTATE_NOSPEC_ALTERNATIVE \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ ALTERNATIVE("", "lfence", X86_FEATURE_RETPOLINE_AMD) \
+ ALTERNATIVE(ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)

#else /* CONFIG_X86_32 */
@@ -184,7 +148,6 @@
* here, anyway.
*/
# define CALL_NOSPEC \
- ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,15 +7,31 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
+#include <asm/frame.h>

.macro THUNK reg
.section .text.__x86.indirect_thunk

+ .align 32
SYM_FUNC_START(__x86_indirect_thunk_\reg)
- CFI_STARTPROC
- JMP_NOSPEC %\reg
- CFI_ENDPROC
+ JMP_NOSPEC \reg
SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ UNWIND_HINT_EMPTY
+ pause
+ lfence
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov %\reg, (%_ASM_SP)
+ UNWIND_HINT_RET_OFFSET
+ ret
+SYM_FUNC_END(__x86_retpoline_\reg)
+
.endm

/*
@@ -32,6 +48,7 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)

#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg)

#undef GEN
#define GEN(reg) THUNK reg
@@ -41,3 +58,6 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>

+#undef GEN
+#define GEN(reg) EXPORT_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>