[GIT pull] x86/asm for 5.1

From: Thomas Gleixner
Date: Sun Mar 10 2019 - 07:34:20 EST


Linus,

please pull the latest x86-asm-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-asm-for-linus

The following updates for 5.1 are available:

- Add pinning of sensitive CR0/CR4 bits, i.e. WP and SMAP/SMEP as these
have been targets of recent exploits. This includes a lktdm test.

- Remove unused macros/inlines and obsolete __GNUC__ conditionals.

Thanks,

tglx

------------------>
Kees Cook (4):
x86/asm: Pin sensitive CR4 bits
x86/asm: Avoid taking an exception before cr4 restore
x86/asm: Pin sensitive CR0 bits
lkdtm: Check for SMEP clearing protections

Rasmus Villemoes (2):
x86/asm: Remove dead __GNUC__ conditionals
x86/asm: Remove unused __constant_c_x_memset() macro and inlines


arch/x86/include/asm/bitops.h | 6 --
arch/x86/include/asm/special_insns.h | 43 ++++++++++++++-
arch/x86/include/asm/string_32.h | 104 -----------------------------------
arch/x86/include/asm/string_64.h | 15 -----
arch/x86/kernel/cpu/common.c | 12 +++-
drivers/misc/lkdtm/bugs.c | 61 ++++++++++++++++++++
drivers/misc/lkdtm/core.c | 1 +
drivers/misc/lkdtm/lkdtm.h | 1 +
8 files changed, 116 insertions(+), 127 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ad7b210aa3f6..d153d570bb04 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,13 +36,7 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/

-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
-/* Technically wrong, but this avoids compilation errors on some gcc
- versions. */
-#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
-#else
#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
-#endif

#define ADDR BITOP_ADDR(addr)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..7fa4fe880395 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -5,6 +5,7 @@

#ifdef __KERNEL__

+#include <asm/processor-flags.h>
#include <asm/nops.h>

/*
@@ -25,7 +26,28 @@ static inline unsigned long native_read_cr0(void)

static inline void native_write_cr0(unsigned long val)
{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
+ bool warn = false;
+
+again:
+ val |= X86_CR0_WP;
+ /*
+ * In order to have the compiler not optimize away the check
+ * after the cr4 write, mark "val" as being also an output ("+r")
+ * by this asm() block so it will perform an explicit check, as
+ * if it were "volatile".
+ */
+ asm volatile("mov %0,%%cr0" : "+r" (val) : "m" (__force_order) : );
+ /*
+ * If the MOV above was used directly as a ROP gadget we can
+ * notice the lack of pinned bits in "val" and start the function
+ * from the beginning to gain the WP bit for sure. And do it
+ * without first taking the exception for a WARN().
+ */
+ if ((val & X86_CR0_WP) != X86_CR0_WP) {
+ warn = true;
+ goto again;
+ }
+ WARN_ONCE(warn, "Attempt to unpin X86_CR0_WP, cr0 bypass attack?!\n");
}

static inline unsigned long native_read_cr2(void)
@@ -72,9 +94,28 @@ static inline unsigned long native_read_cr4(void)
return val;
}

+extern volatile unsigned long cr4_pin;
+
static inline void native_write_cr4(unsigned long val)
{
+ unsigned long warn = 0;
+
+again:
+ val |= cr4_pin;
asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+ /*
+ * If the MOV above was used directly as a ROP gadget we can
+ * notice the lack of pinned bits in "val" and start the function
+ * from the beginning to gain the cr4_pin bits for sure. Note
+ * that "val" must be volatile to keep the compiler from
+ * optimizing away this check.
+ */
+ if ((val & cr4_pin) != cr4_pin) {
+ warn = ~val & cr4_pin;
+ goto again;
+ }
+ WARN_ONCE(warn, "Attempt to unpin cr4 bits: %lx; bypass attack?!\n",
+ warn);
}

#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 55d392c6bd29..f74362b05619 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -179,14 +179,7 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
* No 3D Now!
*/

-#if (__GNUC__ >= 4)
#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
-#else
-#define memcpy(t, f, n) \
- (__builtin_constant_p((n)) \
- ? __constant_memcpy((t), (f), (n)) \
- : __memcpy((t), (f), (n)))
-#endif

#endif
#endif /* !CONFIG_FORTIFY_SOURCE */
@@ -216,29 +209,6 @@ static inline void *__memset_generic(void *s, char c, size_t count)
/* we might want to write optimized versions of these later */
#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count))

-/*
- * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
- * things 32 bits at a time even when we don't know the size of the
- * area at compile-time..
- */
-static __always_inline
-void *__constant_c_memset(void *s, unsigned long c, size_t count)
-{
- int d0, d1;
- asm volatile("rep ; stosl\n\t"
- "testb $2,%b3\n\t"
- "je 1f\n\t"
- "stosw\n"
- "1:\ttestb $1,%b3\n\t"
- "je 2f\n\t"
- "stosb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1)
- : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
- : "memory");
- return s;
-}
-
/* Added by Gertjan van Wingerde to make minix and sysv module work */
#define __HAVE_ARCH_STRNLEN
extern size_t strnlen(const char *s, size_t count);
@@ -247,72 +217,6 @@ extern size_t strnlen(const char *s, size_t count);
#define __HAVE_ARCH_STRSTR
extern char *strstr(const char *cs, const char *ct);

-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as we by now know that both pattern and count is constant..
- */
-static __always_inline
-void *__constant_c_and_count_memset(void *s, unsigned long pattern,
- size_t count)
-{
- switch (count) {
- case 0:
- return s;
- case 1:
- *(unsigned char *)s = pattern & 0xff;
- return s;
- case 2:
- *(unsigned short *)s = pattern & 0xffff;
- return s;
- case 3:
- *(unsigned short *)s = pattern & 0xffff;
- *((unsigned char *)s + 2) = pattern & 0xff;
- return s;
- case 4:
- *(unsigned long *)s = pattern;
- return s;
- }
-
-#define COMMON(x) \
- asm volatile("rep ; stosl" \
- x \
- : "=&c" (d0), "=&D" (d1) \
- : "a" (eax), "0" (count/4), "1" ((long)s) \
- : "memory")
-
- {
- int d0, d1;
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
- /* Workaround for broken gcc 4.0 */
- register unsigned long eax asm("%eax") = pattern;
-#else
- unsigned long eax = pattern;
-#endif
-
- switch (count % 4) {
- case 0:
- COMMON("");
- return s;
- case 1:
- COMMON("\n\tstosb");
- return s;
- case 2:
- COMMON("\n\tstosw");
- return s;
- default:
- COMMON("\n\tstosw\n\tstosb");
- return s;
- }
- }
-
-#undef COMMON
-}
-
-#define __constant_c_x_memset(s, c, count) \
- (__builtin_constant_p(count) \
- ? __constant_c_and_count_memset((s), (c), (count)) \
- : __constant_c_memset((s), (c), (count)))
-
#define __memset(s, c, count) \
(__builtin_constant_p(count) \
? __constant_count_memset((s), (c), (count)) \
@@ -321,15 +225,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, size_t);
#ifndef CONFIG_FORTIFY_SOURCE
-#if (__GNUC__ >= 4)
#define memset(s, c, count) __builtin_memset(s, c, count)
-#else
-#define memset(s, c, count) \
- (__builtin_constant_p(c) \
- ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
- (count)) \
- : __memset((s), (c), (count)))
-#endif
#endif /* !CONFIG_FORTIFY_SOURCE */

#define __HAVE_ARCH_MEMSET16
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 4e4194e21a09..75314c3dbe47 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -14,21 +14,6 @@
extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);

-#ifndef CONFIG_FORTIFY_SOURCE
-#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
-#define memcpy(dst, src, len) \
-({ \
- size_t __len = (len); \
- void *__ret; \
- if (__builtin_constant_p(len) && __len >= 64) \
- __ret = __memcpy((dst), (src), __len); \
- else \
- __ret = __builtin_memcpy((dst), (src), __len); \
- __ret; \
-})
-#endif
-#endif /* !CONFIG_FORTIFY_SOURCE */
-
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t n);
void *__memset(void *s, int c, size_t n);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..7e0ea4470f8e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -312,10 +312,16 @@ static __init int setup_disable_smep(char *arg)
}
__setup("nosmep", setup_disable_smep);

+volatile unsigned long cr4_pin __ro_after_init;
+EXPORT_SYMBOL_GPL(cr4_pin);
+
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
- if (cpu_has(c, X86_FEATURE_SMEP))
+ if (cpu_has(c, X86_FEATURE_SMEP)) {
+ if (!(cr4_pin & X86_CR4_SMEP))
+ cr4_pin |= X86_CR4_SMEP;
cr4_set_bits(X86_CR4_SMEP);
+ }
}

static __init int setup_disable_smap(char *arg)
@@ -334,6 +340,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)

if (cpu_has(c, X86_FEATURE_SMAP)) {
#ifdef CONFIG_X86_SMAP
+ if (!(cr4_pin & X86_CR4_SMAP))
+ cr4_pin |= X86_CR4_SMAP;
cr4_set_bits(X86_CR4_SMAP);
#else
cr4_clear_bits(X86_CR4_SMAP);
@@ -351,6 +359,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_UMIP))
goto out;

+ if (!(cr4_pin & X86_CR4_UMIP))
+ cr4_pin |= X86_CR4_UMIP;
cr4_set_bits(X86_CR4_UMIP);

pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 7eebbdfbcacd..6176384b4f85 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -255,3 +255,64 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void)

pr_err("FAIL: accessed page after stack!\n");
}
+
+void lkdtm_UNSET_SMEP(void)
+{
+#ifdef CONFIG_X86_64
+#define MOV_CR4_DEPTH 64
+ void (*direct_write_cr4)(unsigned long val);
+ unsigned char *insn;
+ unsigned long cr4;
+ int i;
+
+ cr4 = native_read_cr4();
+
+ if ((cr4 & X86_CR4_SMEP) != X86_CR4_SMEP) {
+ pr_err("FAIL: SMEP not in use\n");
+ return;
+ }
+ cr4 &= ~(X86_CR4_SMEP);
+
+ pr_info("trying to clear SMEP normally\n");
+ native_write_cr4(cr4);
+ if (cr4 == native_read_cr4()) {
+ pr_err("FAIL: pinning SMEP failed!\n");
+ cr4 |= X86_CR4_SMEP;
+ pr_info("restoring SMEP\n");
+ native_write_cr4(cr4);
+ return;
+ }
+ pr_info("ok: SMEP did not get cleared\n");
+
+ /*
+ * To test the post-write pinning verification we need to call
+ * directly into the the middle of native_write_cr4() where the
+ * cr4 write happens, skipping the pinning. This searches for
+ * the cr4 writing instruction.
+ */
+ insn = (unsigned char *)native_write_cr4;
+ for (i = 0; i < MOV_CR4_DEPTH; i++) {
+ /* mov %rdi, %cr4 */
+ if (insn[i] == 0x0f && insn[i+1] == 0x22 && insn[i+2] == 0xe7)
+ break;
+ }
+ if (i >= MOV_CR4_DEPTH) {
+ pr_info("ok: cannot locate cr4 writing call gadget\n");
+ return;
+ }
+ direct_write_cr4 = (void *)(insn + i);
+
+ pr_info("trying to clear SMEP with call gadget\n");
+ direct_write_cr4(cr4);
+ if (native_read_cr4() & X86_CR4_SMEP) {
+ pr_info("ok: SMEP removal was reverted\n");
+ } else {
+ pr_err("FAIL: cleared SMEP not detected!\n");
+ cr4 |= X86_CR4_SMEP;
+ pr_info("restoring SMEP\n");
+ native_write_cr4(cr4);
+ }
+#else
+ pr_err("FAIL: this test is x86_64-only\n");
+#endif
+}
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 2837dc77478e..fd668776414b 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -132,6 +132,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(CORRUPT_LIST_ADD),
CRASHTYPE(CORRUPT_LIST_DEL),
CRASHTYPE(CORRUPT_USER_DS),
+ CRASHTYPE(UNSET_SMEP),
CRASHTYPE(CORRUPT_STACK),
CRASHTYPE(CORRUPT_STACK_STRONG),
CRASHTYPE(STACK_GUARD_PAGE_LEADING),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 3c6fd327e166..9c78d7e21c13 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -26,6 +26,7 @@ void lkdtm_CORRUPT_LIST_DEL(void);
void lkdtm_CORRUPT_USER_DS(void);
void lkdtm_STACK_GUARD_PAGE_LEADING(void);
void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
+void lkdtm_UNSET_SMEP(void);

/* lkdtm_heap.c */
void lkdtm_OVERWRITE_ALLOCATION(void);