[PATCH V3 1/3] x86/entry: avoid calling into sync_regs() when entering from userspace

From: Lai Jiangshan
Date: Mon Aug 17 2020 - 01:23:49 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

7f2590a110b8("x86/entry/64: Use a per-CPU trampoline stack for IDT entries")
made a change that when any exception happens on userspace, the
entry code will save the pt_regs on the sp0 stack, and then copy it
to the thread stack via sync_regs() and switch to thread stack
afterward.

And recent x86/entry work makes interrupt also use idtentry
and makes all the interrupt code save the pt_regs on the sp0 stack
and then copy it to the thread stack like exception.

This is hot path (page fault, ipi), such overhead should be avoided.
This patch borrows the way how original interrupt_entry handles it.
It switches to the thread stack directly right away when comes
from userspace.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
arch/x86/entry/entry_64.S | 43 +++++++++++++++++++++++++++++++--------
1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea9337816..1a7715430da3 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -928,19 +928,42 @@ SYM_CODE_END(paranoid_exit)
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
cld
- PUSH_AND_CLEAR_REGS save_ret=1
- ENCODE_FRAME_POINTER 8
- testb $3, CS+8(%rsp)
+ testb $3, CS-ORIG_RAX+8(%rsp)
jz .Lerror_kernelspace

- /*
- * We entered from user mode or we're pretending to have entered
- * from user mode due to an IRET fault.
- */
SWAPGS
FENCE_SWAPGS_USER_ENTRY
- /* We have user CR3. Change to kernel CR3. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ /*
+ * Switch to the thread stack. The IRET frame and orig_ax are
+ * on the stack, as well as the return address. RDI..R12 are
+ * not (yet) on the stack and space has not (yet) been
+ * allocated for them.
+ */
+ pushq %rdx
+
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ /*
+ * We have RDX, return address, and orig_ax on the stack on
+ * top of the IRET frame. That means offset=24
+ */
+ UNWIND_HINT_IRET_REGS base=%rdx offset=24
+
+ pushq 7*8(%rdx) /* regs->ss */
+ pushq 6*8(%rdx) /* regs->rsp */
+ pushq 5*8(%rdx) /* regs->eflags */
+ pushq 4*8(%rdx) /* regs->cs */
+ pushq 3*8(%rdx) /* regs->ip */
+ pushq 2*8(%rdx) /* regs->orig_ax */
+ pushq 8(%rdx) /* return address */
+ UNWIND_HINT_FUNC
+
+ PUSH_AND_CLEAR_REGS rdx=(%rdx), save_ret=1
+ ENCODE_FRAME_POINTER 8
+ ret

.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@@ -964,6 +987,8 @@ SYM_CODE_START_LOCAL(error_entry)
* for these here too.
*/
.Lerror_kernelspace:
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
leaq native_irq_return_iret(%rip), %rcx
cmpq %rcx, RIP+8(%rsp)
je .Lerror_bad_iret
--
2.19.1.6.gb485710b