[PATCH 1/6] x86/entry: Assert that syscalls are on the right stack

From: Andy Lutomirski
Date: Fri Jun 26 2020 - 13:21:25 EST


Now that the entry stack is a full page, it's too easy to regress
the system call entry code and end up on the wrong stack without
noticing. Assert that all system calls (SYSCALL64, SYSCALL32,
SYSENTER, and INT80) are on the right stack and have pt_regs in the
right place.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
arch/x86/entry/common.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bd3f14175193..ed8ccc820995 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -45,6 +45,15 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

+/* Check that the stack and regs on entry from user mode are sane. */
+static void check_user_regs(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
+ WARN_ON_ONCE(!on_thread_stack());
+ WARN_ON_ONCE(regs != task_pt_regs(current));
+ }
+}
+
#ifdef CONFIG_CONTEXT_TRACKING
/**
* enter_from_user_mode - Establish state when coming from user mode
@@ -127,9 +136,6 @@ static long syscall_trace_enter(struct pt_regs *regs)
unsigned long ret = 0;
u32 work;

- if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
- BUG_ON(regs != task_pt_regs(current));
-
work = READ_ONCE(ti->flags);

if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
@@ -346,6 +352,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
struct thread_info *ti;

+ check_user_regs(regs);
+
enter_from_user_mode();
instrumentation_begin();

@@ -409,6 +417,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
+ check_user_regs(regs);
+
enter_from_user_mode();
instrumentation_begin();

@@ -460,6 +470,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
vdso_image_32.sym_int80_landing_pad;
bool success;

+ check_user_regs(regs);
+
/*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
* so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
--
2.25.4