[PATCH] x86/nospec: Unwreck the RSB stuffing

From: Peter Zijlstra
Date: Tue Aug 16 2022 - 08:29:11 EST



Replying here, because obviously there's no actual posting of this
patch... :/

> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -118,13 +118,28 @@
> #endif
> .endm
>
> +.macro ISSUE_UNBALANCED_RET_GUARD
> + ANNOTATE_INTRA_FUNCTION_CALL
> + call .Lunbalanced_ret_guard_\@
> + int3
> +.Lunbalanced_ret_guard_\@:
> + add $(BITS_PER_LONG/8), %_ASM_SP
> + lfence
> +.endm
> +
> /*
> * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
> * monstrosity above, manually.
> */
> -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
> +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
> +.ifb \ftr2
> ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
> +.else
> + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
> +.endif
> __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
> +.Lunbalanced_\@:
> + ISSUE_UNBALANCED_RET_GUARD
> .Lskip_rsb_\@:
> .endm

(/me deletes all the swear words and starts over)

This must absolutely be the most horrible patch you could come up with,
no? I suppose that's the price of me taking PTO :-(

Could you please test this; I've only compiled it.

---
Subject: x86/nospec: Unwreck the RSB stuffing

Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
made a right mess of the RSB stuffing, rewrite the whole thing to not
suck.

Thanks to Andrew for the enlightening comment about Post-Barrier RSB
things so we can make this code less magical.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
cpufeatures.h | 2 +
nospec-branch.h | 80 +++++++++++++++++++++++++++-----------------------------
2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 235dc85c91c3..1a31ae6d758b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -420,6 +420,8 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */

+#define X86_FEATURE_NEVER (-1) /* "" Logical complement of ALWAYS */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e64fd20778b6..336f8e8cebf8 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,44 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */

/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b; \
- /* barrier for jnz misprediction */ \
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;

#ifdef __ASSEMBLY__
@@ -132,28 +143,15 @@
#endif
.endm

-.macro ISSUE_UNBALANCED_RET_GUARD
- ANNOTATE_INTRA_FUNCTION_CALL
- call .Lunbalanced_ret_guard_\@
- int3
-.Lunbalanced_ret_guard_\@:
- add $(BITS_PER_LONG/8), %_ASM_SP
- lfence
-.endm
-
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
- ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=X86_FEATURE_NEVER
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
.endm