[GIT pull] x86/pti fixes for 4.15

From: Thomas Gleixner
Date: Sun Jan 21 2018 - 06:45:36 EST


Linus,

please pull the latest x86-pti-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-pti-for-linus

A small set of fixes for the meltdown/spectre mitigations:

- Make kprobes aware of retpolines to prevent probes in the retpoline
thunks.

- Make the machine check exception speculation protected. MCE used to
issue an indirect call directly from the ASM entry code. Convert that to
a direct call into a C-function and issue the indirect call from there
so the compiler can add the retpoline protection,

- Make the vmexit_fill_RSB() assembly less stupid

- Fix a typo in the PTI documentation

Thanks,

tglx

------------------>
Andi Kleen (1):
x86/retpoline: Optimize inline assembler for vmexit_fill_RSB

Masami Hiramatsu (3):
retpoline: Introduce start/end markers of indirect thunk
kprobes/x86: Blacklist indirect thunk functions for kprobes
kprobes/x86: Disable optimizing on the function jumps to indirect thunk

Thomas Gleixner (1):
x86/mce: Make machine check speculation protected

zhenwei.pi (1):
x86/pti: Document fix wrong index


Documentation/x86/pti.txt | 2 +-
arch/x86/entry/entry_64.S | 2 +-
arch/x86/include/asm/nospec-branch.h | 10 +++++++---
arch/x86/include/asm/traps.h | 1 +
arch/x86/kernel/cpu/mcheck/mce.c | 5 +++++
arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++-
arch/x86/kernel/vmlinux.lds.S | 6 ++++++
arch/x86/lib/retpoline.S | 5 +++--
8 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
index d11eff61fc9a..5cd58439ad2d 100644
--- a/Documentation/x86/pti.txt
+++ b/Documentation/x86/pti.txt
@@ -78,7 +78,7 @@ this protection comes at a cost:
non-PTI SYSCALL entry code, so requires mapping fewer
things into the userspace page tables. The downside is
that stacks must be switched at entry time.
- d. Global pages are disabled for all kernel structures not
+ c. Global pages are disabled for all kernel structures not
mapped into both kernel and userspace page tables. This
feature of the MMU allows different processes to share TLB
entries mapping the kernel. Losing the feature means more
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d54a0ede61d1..63f4320602a3 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1258,7 +1258,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
#endif

#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif

/*
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 7b45d8424150..4ad41087ce0e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -194,6 +194,9 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};

+extern char __indirect_thunk_start[];
+extern char __indirect_thunk_end[];
+
/*
* On VMEXIT we must ensure that no RSB predictions learned in the guest
* can be followed in the host, by overwriting the RSB completely. Both
@@ -203,16 +206,17 @@ enum spectre_v2_mitigation {
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- unsigned long loops = RSB_CLEAR_LOOPS / 2;
+ unsigned long loops;

asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE("jmp 910f",
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
X86_FEATURE_RETPOLINE)
"910:"
- : "=&r" (loops), ASM_CALL_CONSTRAINT
- : "r" (loops) : "memory" );
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
+
#endif /* __ASSEMBLY__ */
#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 31051f35cbb7..3de69330e6c5 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
+dotraplinkage void do_mce(struct pt_regs *, long);

static inline int get_si_code(unsigned long condition)
{
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..a9e898b71208 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1788,6 +1788,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;

+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4f98aad38237..3668f28cf5fc 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -40,6 +40,7 @@
#include <asm/debugreg.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
+#include <asm/nospec-branch.h>

#include "common.h"

@@ -205,7 +206,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
}

/* Check whether insn is indirect jump */
-static int insn_is_indirect_jump(struct insn *insn)
+static int __insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -239,6 +240,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
return (start <= target && target <= start + len);
}

+static int insn_is_indirect_jump(struct insn *insn)
+{
+ int ret = __insn_is_indirect_jump(insn);
+
+#ifdef CONFIG_RETPOLINE
+ /*
+ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+ * older gcc may use indirect jump. So we add this check instead of
+ * replace indirect-jump check.
+ */
+ if (!ret)
+ ret = insn_jump_into_range(insn,
+ (unsigned long)__indirect_thunk_start,
+ (unsigned long)__indirect_thunk_end -
+ (unsigned long)__indirect_thunk_start);
+#endif
+ return ret;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1e413a9326aa..9b138a06c1a4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -124,6 +124,12 @@ SECTIONS
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif

+#ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+ *(.text.__x86.indirect_thunk)
+ __indirect_thunk_end = .;
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cb45c6cb465f..dfb2ba91b670 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -9,7 +9,7 @@
#include <asm/nospec-branch.h>

.macro THUNK reg
- .section .text.__x86.indirect_thunk.\reg
+ .section .text.__x86.indirect_thunk

ENTRY(__x86_indirect_thunk_\reg)
CFI_STARTPROC
@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg)
* than one per register with the correct names. So we do it
* the simple and nasty way...
*/
-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)

GENERATE_THUNK(_ASM_AX)