[PATCH 2/2] x86/entry: Do enter_from_user_mode with IRQs off

From: Andy Lutomirski
Date: Sun Mar 06 2016 - 21:30:30 EST


Now that slow-path syscalls always enter C before enabling
interrupts, it's straightforward to do enter_from_user_mode before
enabling interrupts rather than doing it as part of entry tracing.

With this change, we should finally be able to retire
exception_enter.

This will also enable optimizations based on knowing that we never
change context tracking state with interrupts on.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
arch/x86/entry/common.c | 39 ++++++++++++++------------------------
arch/x86/include/asm/thread_info.h | 5 ++++-
2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9ef3258320ec..8e8ca0eb36be 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -38,14 +38,17 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
return (struct thread_info *)(top_of_stack - THREAD_SIZE);
}

-#ifdef CONFIG_CONTEXT_TRACKING
+#ifndef CONFIG_CONTEXT_TRACKING
+static
+#else
+__visible
+#endif
/* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+void enter_from_user_mode(void)
{
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
}
-#endif

static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
@@ -85,17 +88,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)

work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;

-#ifdef CONFIG_CONTEXT_TRACKING
- /*
- * If TIF_NOHZ is set, we are required to call user_exit() before
- * doing anything that could touch RCU.
- */
- if (work & _TIF_NOHZ) {
- enter_from_user_mode();
- work &= ~_TIF_NOHZ;
- }
-#endif
-
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
@@ -354,6 +346,7 @@ __visible void do_syscall_64(struct pt_regs *regs)
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long nr = regs->orig_ax;

+ enter_from_user_mode();
local_irq_enable();

if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
@@ -376,9 +369,9 @@ __visible void do_syscall_64(struct pt_regs *regs)

#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
- * Does a 32-bit syscall. Called with IRQs on and does all entry and
- * exit work and returns with IRQs off. This function is extremely hot
- * in workloads that use it, and it's usually called from
+ * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
+ * all entry and exit work and returns with IRQs off. This function is
+ * extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
@@ -419,6 +412,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
+ enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
@@ -441,11 +435,9 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;

- /*
- * Fetch EBP from where the vDSO stashed it.
- *
- * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
- */
+ enter_from_user_mode();
+
+ /* Fetch EBP from where the vDSO stashed it. */
local_irq_enable();
if (
#ifdef CONFIG_X86_64
@@ -464,9 +456,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
-#ifdef CONFIG_CONTEXT_TRACKING
- enter_from_user_mode();
-#endif
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c0778fcab06d..fbdb24e61835 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -134,7 +134,10 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)

-/* work to do in syscall_trace_enter() */
+/*
+ * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
+ * enter_from_user_mode()
+ */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
--
2.5.0