[PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit()

From: Lai Jiangshan
Date: Tue Aug 31 2021 - 13:53:09 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

All the facilities are set in traps.c, so we can implement the major body
of paranoid_entry() in C as do_paranoid_entry() and the whole
paranoid_exit() in C.

paranoid_entry() needs to save two values which are added into the
struct ist_regs. And paranoid_exit() use them after the interrupt
is handled.

No functional change intended.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
arch/x86/entry/entry_64.S | 128 +++--------------------------------
arch/x86/entry/traps.c | 62 +++++++++++++++++
arch/x86/include/asm/traps.h | 22 ++++++
3 files changed, 92 insertions(+), 120 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ae10ca351f4..8b2e19e6c9e1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -437,6 +437,7 @@ SYM_CODE_START(\asmsym)

call \cfunc

+ movq %rsp, %rdi /* ist_regs pointer */
call paranoid_exit
addq $IST_pt_regs, %rsp /* put %rsp back to pt_regs */
jmp restore_regs_and_return_to_kernel
@@ -516,6 +517,7 @@ SYM_CODE_START(\asmsym)
* identical to the stack in the IRET frame or the VC fall-back stack,
* so it is definitely mapped even with PTI enabled.
*/
+ movq %rsp, %rdi /* ist_regs pointer */
call paranoid_exit
addq $IST_pt_regs, %rsp /* put %rsp back to pt_regs */
jmp restore_regs_and_return_to_kernel
@@ -548,6 +550,7 @@ SYM_CODE_START(\asmsym)
movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
call \cfunc

+ movq %rsp, %rdi /* ist_regs pointer */
call paranoid_exit
addq $IST_pt_regs, %rsp /* put %rsp back to pt_regs */
jmp restore_regs_and_return_to_kernel
@@ -840,14 +843,8 @@ SYM_CODE_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */

/*
- * Save all registers in pt_regs. Return GSBASE related information
- * in EBX depending on the availability of the FSGSBASE instructions:
- *
- * FSGSBASE R/EBX
- * N 0 -> SWAPGS on exit
- * 1 -> no SWAPGS on exit
- *
- * Y GSBASE value at entry, must be restored in paranoid_exit
+ * Save all registers and addtional info in ist_regs.
+ * Switch CR3 and gsbase if needed.
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
@@ -856,124 +853,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
movq RDI(%rsp), %rsi /* temporarily store the return address in %rsi */
movq %rdi, RDI(%rsp) /* put %rdi onto pt_regs */
subq $IST_pt_regs, %rsp /* reserve room for ist_regs */
+ movq %rsp, %rdi /* ist_regs pointer */
pushq %rsi /* put the return address onto the stack */
ENCODE_FRAME_POINTER 8+IST_pt_regs

- /*
- * Always stash CR3 in %r14. This value will be restored,
- * verbatim, at exit. Needed if paranoid_entry interrupted
- * another entry that already switched to the user CR3 value
- * but has not yet returned to userspace.
- *
- * This is also why CS (stashed in the "iret frame" by the
- * hardware at entry) can not be used: this may be a return
- * to kernel code, but with a user CR3 value.
- *
- * Switching CR3 does not depend on kernel GSBASE so it can
- * be done before switching to the kernel GSBASE. This is
- * required for FSGSBASE because the kernel GSBASE has to
- * be retrieved from a kernel internal table.
- */
- SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-
- /*
- * Handling GSBASE depends on the availability of FSGSBASE.
- *
- * Without FSGSBASE the kernel enforces that negative GSBASE
- * values indicate kernel GSBASE. With FSGSBASE no assumptions
- * can be made about the GSBASE value when entering from user
- * space.
- */
- ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
-
- /*
- * Read the current GSBASE and store it in %rbx unconditionally,
- * retrieve and set the current CPUs kernel GSBASE. The stored value
- * has to be restored in paranoid_exit unconditionally.
- *
- * The unconditional write to GS base below ensures that no subsequent
- * loads based on a mispredicted GS base can happen, therefore no LFENCE
- * is needed here.
- */
- SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
- ret
-
-.Lparanoid_entry_checkgs:
- /* EBX = 1 -> kernel GSBASE active, no restore required */
- movl $1, %ebx
- /*
- * The kernel-enforced convention is a negative GSBASE indicates
- * a kernel value. No SWAPGS needed on entry and exit.
- */
- movl $MSR_GS_BASE, %ecx
- rdmsr
- testl %edx, %edx
- jns .Lparanoid_entry_swapgs
- ret
-
-.Lparanoid_entry_swapgs:
- swapgs
-
- /*
- * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
- * unconditional CR3 write, even in the PTI case. So do an lfence
- * to prevent GS speculation, regardless of whether PTI is enabled.
- */
- FENCE_SWAPGS_KERNEL_ENTRY
-
- /* EBX = 0 -> SWAPGS required on exit */
- xorl %ebx, %ebx
+ call do_paranoid_entry
ret
SYM_CODE_END(paranoid_entry)

-/*
- * "Paranoid" exit path from exception stack. This is invoked
- * only on return from IST interrupts that came from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated. Fortunately, there's no good reason to try
- * to handle preemption here.
- *
- * R/EBX contains the GSBASE related information depending on the
- * availability of the FSGSBASE instructions:
- *
- * FSGSBASE R/EBX
- * N 0 -> SWAPGS on exit
- * 1 -> no SWAPGS on exit
- *
- * Y User space GSBASE, must be restored unconditionally
- */
-SYM_CODE_START_LOCAL(paranoid_exit)
- UNWIND_HINT_REGS offset=8
- /*
- * The order of operations is important. RESTORE_CR3 requires
- * kernel GSBASE.
- *
- * NB to anyone to try to optimize this code: this code does
- * not execute at all for exceptions from user mode. Those
- * exceptions go through error_exit instead.
- */
- RESTORE_CR3 scratch_reg=%rax save_reg=%r14
-
- /* Handle the three GSBASE cases */
- ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
-
- /* With FSGSBASE enabled, unconditionally restore GSBASE */
- wrgsbase %rbx
- ret
-
-.Lparanoid_exit_checkgs:
- /* On non-FSGSBASE systems, conditionally do SWAPGS */
- testl %ebx, %ebx
- jnz .Lparanoid_exit_done
-
- /* We are returning to a context with user GSBASE */
- swapgs
-.Lparanoid_exit_done:
- ret
-SYM_CODE_END(paranoid_exit)
-
/*
* Save all registers in pt_regs, and switch GS if needed.
*/
@@ -1308,6 +1195,7 @@ end_repeat_nmi:
* Use paranoid_exit to handle SWAPGS and CR3, but no need to use
* restore_regs_and_return_to_kernel as we must handle nested NMI.
*/
+ movq %rsp, %rdi /* ist_regs pointer */
call paranoid_exit
addq $IST_pt_regs, %rsp /* put %rsp back to pt_regs */

diff --git a/arch/x86/entry/traps.c b/arch/x86/entry/traps.c
index b5c92b4e0cb5..52511db6baa6 100644
--- a/arch/x86/entry/traps.c
+++ b/arch/x86/entry/traps.c
@@ -1029,6 +1029,68 @@ static __always_inline unsigned long ist_switch_to_kernel_gsbase(void)
/* SWAPGS required on exit */
return 0;
}
+
+asmlinkage __visible __entry_text
+void do_paranoid_entry(struct ist_regs *ist)
+{
+ /*
+ * Always stash CR3 in ist->cr3. This value will be restored,
+ * verbatim, at exit. Needed if paranoid_entry interrupted
+ * another entry that already switched to the user CR3 value
+ * but has not yet returned to userspace.
+ *
+ * This is also why CS (stashed in the "iret frame" by the
+ * hardware at entry) can not be used: this may be a return
+ * to kernel code, but with a user CR3 value.
+ *
+ * Switching CR3 does not depend on kernel GSBASE so it can
+ * be done before switching to the kernel GSBASE. This is
+ * required for FSGSBASE because the kernel GSBASE has to
+ * be retrieved from a kernel internal table.
+ */
+ ist->cr3 = ist_switch_to_kernel_cr3();
+
+ /* Handle GSBASE, store the return value in ist_regs for exit. */
+ ist->gsbase = ist_switch_to_kernel_gsbase();
+}
+
+/*
+ * "Paranoid" exit path from exception stack. This is invoked
+ * only on return from IST interrupts that came from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated. Fortunately, there's no good reason to try
+ * to handle preemption here.
+ */
+asmlinkage __visible __entry_text
+void paranoid_exit(struct ist_regs *ist)
+{
+ /*
+ * Restore cr3 at first, it can use kernel GSBASE.
+ */
+ ist_restore_cr3(ist->cr3);
+
+ /*
+ * Handle the three GSBASE cases.
+ *
+ * ist->gsbase contains the GSBASE related information depending
+ * on the availability of the FSGSBASE instructions:
+ *
+ * FSGSBASE ist->gsbase
+ * N 0 -> SWAPGS on exit
+ * 1 -> no SWAPGS on exit
+ *
+ * Y User space GSBASE, must be restored unconditionally
+ */
+ if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+ wrgsbase(ist->gsbase);
+ return;
+ }
+
+ if (!ist->gsbase)
+ native_swapgs();
+}
#endif

static bool is_sysenter_singlestep(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index e24c63bbc30a..0bc7117a01cd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -12,6 +12,26 @@

#ifdef CONFIG_X86_64
struct ist_regs {
+ /*
+ * Always stash CR3 in cr3. This value will be restored,
+ * verbatim, at exit. Needed if paranoid_entry interrupted
+ * another entry that already switched to the user CR3 value
+ * but has not yet returned to userspace.
+ */
+ unsigned long cr3;
+
+ /*
+ * gsbase contains the GSBASE related information depending on the
+ * availability of the FSGSBASE instructions:
+ *
+ * FSGSBASE gsbase
+ * N 0 -> SWAPGS on exit
+ * 1 -> no SWAPGS on exit
+ *
+ * Y User space GSBASE, must be restored unconditionally
+ */
+ unsigned long gsbase;
+
/*
* ist specific fields must be defined before pt_regs
* and they are located below pt_regs on the stacks.
@@ -20,6 +40,8 @@ struct ist_regs {
};

asmlinkage __visible notrace struct pt_regs *do_error_entry(struct pt_regs *eregs);
+asmlinkage __visible notrace void do_paranoid_entry(struct ist_regs *ist);
+asmlinkage __visible notrace void paranoid_exit(struct ist_regs *ist);
void __init trap_init(void);
asmlinkage __visible noinstr struct ist_regs *vc_switch_off_ist(struct ist_regs *ist);
#endif
--
2.19.1.6.gb485710b