[PATCH 1/2] spurious check for user space address

From: Luming Yu
Date: Tue Nov 27 2018 - 06:16:18 EST


we saw a suspected spurious fault that failed VMA
access check but passed spurious check for a user
space address access of (rw,execut). The patch is
trying to catch the case which might have indicated
a stale TLB (software bug found) and a harmful
spruious fault.

Signed-off-by: Luming Yu <luming.yu@xxxxxxxxx>
Signed-off-by: Yongkai Wu <yongkaiwu@xxxxxxxxxxx>
---
arch/x86/mm/fault.c | 95 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 71d4b9d4d43f..5e2a49010d87 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1004,29 +1004,7 @@ static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
return 1;
}

-/*
- * Handle a spurious fault caused by a stale TLB entry.
- *
- * This allows us to lazily refresh the TLB when increasing the
- * permissions of a kernel page (RO -> RW or NX -> X). Doing it
- * eagerly is very expensive since that implies doing a full
- * cross-processor TLB flush, even if no stale TLB entries exist
- * on other processors.
- *
- * Spurious faults may only occur if the TLB contains an entry with
- * fewer permission than the page table entry. Non-present (P = 0)
- * and reserved bit (R = 1) faults are never spurious.
- *
- * There are no security implications to leaving a stale TLB when
- * increasing the permissions on a page.
- *
- * Returns non-zero if a spurious fault was handled, zero otherwise.
- *
- * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
- * (Optional Invalidation).
- */
-static noinline int
-spurious_kernel_fault(unsigned long error_code, unsigned long address)
+static int spurious_page_table_check(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -1035,19 +1013,6 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
pte_t *pte;
int ret;

- /*
- * Only writes to RO or instruction fetches from NX may cause
- * spurious faults.
- *
- * These could be from user or supervisor accesses but the TLB
- * is only lazily flushed after a kernel mapping protection
- * change, so user accesses are not expected to cause spurious
- * faults.
- */
- if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
- error_code != (X86_PF_INSTR | X86_PF_PROT))
- return 0;
-
pgd = init_mm.pgd + pgd_index(address);
if (!pgd_present(*pgd))
return 0;
@@ -1090,12 +1055,52 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)

return ret;
}
+
+/*
+ * Handle a spurious fault caused by a stale TLB entry.
+ *
+ * This allows us to lazily refresh the TLB when increasing the
+ * permissions of a kernel page (RO -> RW or NX -> X). Doing it
+ * eagerly is very expensive since that implies doing a full
+ * cross-processor TLB flush, even if no stale TLB entries exist
+ * on other processors.
+ *
+ * Spurious faults may only occur if the TLB contains an entry with
+ * fewer permission than the page table entry. Non-present (P = 0)
+ * and reserved bit (R = 1) faults are never spurious.
+ *
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ *
+ * Returns non-zero if a spurious fault was handled, zero otherwise.
+ *
+ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
+ * (Optional Invalidation).
+ */
+static noinline int
+spurious_kernel_fault(unsigned long error_code, unsigned long address)
+{
+ /*
+ * Only writes to RO or instruction fetches from NX may cause
+ * spurious faults.
+ *
+ * These could be from user or supervisor accesses but the TLB
+ * is only lazily flushed after a kernel mapping protection
+ * change, so user accesses are not expected to cause spurious
+ * faults.
+ */
+ if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
+ error_code != (X86_PF_INSTR | X86_PF_PROT))
+ return 0;
+
+ return spurious_page_table_check(error_code, address);
+}
NOKPROBE_SYMBOL(spurious_kernel_fault);

int show_unhandled_signals = 1;

static inline int
-access_error(unsigned long error_code, struct vm_area_struct *vma)
+access_error(unsigned long error_code, unsigned long address, struct vm_area_struct *vma)
{
/* This is only called for the current mm, so: */
bool foreign = false;
@@ -1120,19 +1125,27 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
if (error_code & X86_PF_WRITE) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
- return 1;
+ goto maybe_spurious;
return 0;
}

/* read, present: */
if (unlikely(error_code & X86_PF_PROT))
- return 1;
+ goto maybe_spurious;

/* read, not present: */
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
- return 1;
+ goto maybe_spurious;

return 0;
+
+maybe_spurious:
+ if (spurious_page_table_check(error_code, address)) {
+ WARN_ONCE(1, "spurious fault? Failed VMA check, passed page table check!\n");
+ return 0;
+ }
+
+ return 1;
}

static int fault_in_kernel_space(unsigned long address)
@@ -1402,7 +1415,7 @@ void do_user_addr_fault(struct pt_regs *regs,
* we can handle it..
*/
good_area:
- if (unlikely(access_error(sw_error_code, vma))) {
+ if (unlikely(access_error(sw_error_code, address, vma))) {
bad_area_access_error(regs, sw_error_code, address, vma);
return;
}
--
2.14.4