[PATCH v11 09/12] x86/smpboot: Remove initial_stack on 64-bit

From: Usama Arif
Date: Thu Feb 23 2023 - 14:13:49 EST


From: Brian Gerst <brgerst@xxxxxxxxx>

Load RSP from current_task->thread.sp instead.

Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
Reviewed-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Tested-by: Usama Arif <usama.arif@xxxxxxxxxxxxx>
Signed-off-by: Usama Arif <usama.arif@xxxxxxxxxxxxx>
---
arch/x86/include/asm/processor.h | 6 +++++-
arch/x86/kernel/acpi/sleep.c | 2 +-
arch/x86/kernel/head_64.S | 35 ++++++++++++++++++--------------
arch/x86/xen/xen-head.S | 2 +-
4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4e35c66edeb7..9c4a5c4d46c1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -648,7 +648,11 @@ static inline void spin_lock_prefetch(const void *x)
#define KSTK_ESP(task) (task_pt_regs(task)->sp)

#else
-#define INIT_THREAD { }
+extern unsigned long __end_init_task[];
+
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__end_init_task - PTREGS_SIZE, \
+}

extern unsigned long KSTK_ESP(struct task_struct *task);

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 47e75c056cb5..008fda8b1982 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -113,7 +113,7 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
- initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
+ current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c32e5b06a9ce..f7905ba4b992 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -62,8 +62,8 @@ SYM_CODE_START_NOALIGN(startup_64)
* tables and then reload them.
*/

- /* Set up the stack for verify_cpu(), similar to initial_stack below */
- leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
+ /* Set up the stack for verify_cpu() */
+ leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp

leaq _text(%rip), %rdi

@@ -245,11 +245,11 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
#ifdef CONFIG_SMP
/*
* Is this the boot CPU coming up? If so everything is available
- * in initial_gs, initial_stack and early_gdt_descr.
+ * in initial_gs and early_gdt_descr.
*/
movl smpboot_control(%rip), %edx
testl $STARTUP_SECONDARY, %edx
- jz .Lsetup_cpu
+ jz .Linit_cpu0_data

/*
* For parallel boot, the APIC ID is retrieved from CPUID, and then
@@ -302,6 +302,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
hlt
jmp 1b

+.Linit_cpu0_data:
+ movq __per_cpu_offset(%rip), %rdx
+ jmp .Lsetup_cpu
+
.Linit_cpu_data:
/* Get the per cpu offset for the given CPU# which is in ECX */
leaq __per_cpu_offset(%rip), %rbx
@@ -314,13 +318,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
addq %rbx, %rcx
movq %rcx, early_gdt_descr_base(%rip)

- /* Find the idle task stack */
- movq pcpu_hot + X86_current_task(%rbx), %rcx
- movq TASK_threadsp(%rcx), %rcx
- movq %rcx, initial_stack(%rip)
+ movq %rbx, %rdx
+#else
+ xorl %edx, %edx
#endif /* CONFIG_SMP */

.Lsetup_cpu:
+ /*
+ * Setup a boot time stack - Any secondary CPU will have lost its stack
+ * by now because the cr3-switch above unmaps the real-mode stack
+ *
+ * RDX contains the per-cpu offset
+ */
+ movq pcpu_hot + X86_current_task(%rdx), %rax
+ movq TASK_threadsp(%rax), %rsp
+
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
@@ -355,12 +367,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movl initial_gs+4(%rip),%edx
wrmsr

- /*
- * Setup a boot time stack - Any secondary CPU will have lost its stack
- * by now because the cr3-switch above unmaps the real-mode stack
- */
- movq initial_stack(%rip), %rsp
-
/* Drop the realmode protection. For the boot CPU the pointer is NULL! */
movq trampoline_lock(%rip), %rax
testq %rax, %rax
@@ -517,7 +523,6 @@ SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
* The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
* reliably detect the end of the stack.
*/
-SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
SYM_DATA(trampoline_lock, .quad 0);
__FINITDATA

diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index ffaa62167f6e..6bd391476656 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
ANNOTATE_NOENDBR
cld

- mov initial_stack(%rip), %rsp
+ leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp

/* Set up %gs.
*
--
2.25.1