[tip:x86/mm] x86/fault: Decode page fault OOPSes better

From: tip-bot for Andy Lutomirski
Date: Thu Nov 22 2018 - 05:12:41 EST


Commit-ID: a1a371c468f7238b7826fde55786b02377faf8e2
Gitweb: https://git.kernel.org/tip/a1a371c468f7238b7826fde55786b02377faf8e2
Author: Andy Lutomirski <luto@xxxxxxxxxx>
AuthorDate: Wed, 21 Nov 2018 15:11:25 -0800
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Thu, 22 Nov 2018 09:24:28 +0100

x86/fault: Decode page fault OOPSes better

One of Linus' favorite hobbies seems to be looking at OOPSes and
decoding the error code in his head. This is not one of my favorite
hobbies :)

Teach the page fault OOPS hander to decode the error code. If it's
a !USER fault from user mode, print an explicit note to that effect
and print out the addresses of various tables that might cause such
an error.

With this patch applied, if I intentionally point the LDT at 0x0 and
run the x86 selftests, I get:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
HW error: normal kernel read fault
This was a system access from user code
IDT: 0xfffffe0000000000 (limit=0xfff) GDT: 0xfffffe0000001000 (limit=0x7f)
LDTR: 0x50 -- base=0x0 limit=0xfff7
TR: 0x40 -- base=0xfffffe0000003000 limit=0x206f
PGD 800000000456e067 P4D 800000000456e067 PUD 4623067 PMD 0
SMP PTI
CPU: 0 PID: 153 Comm: ldt_gdt_64 Not tainted 4.19.0+ #1317
Hardware name: ...
RIP: 0033:0x401454

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Yu-cheng Yu <yu-cheng.yu@xxxxxxxxx>
Link: http://lkml.kernel.org/r/11212acb25980cd1b3030875cd9502414fbb214d.1542841400.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/mm/fault.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 84 insertions(+)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ca38bd0472f2..f5efbdba2b6d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -27,6 +27,7 @@
#include <asm/vm86.h> /* struct vm86 */
#include <asm/mmu_context.h> /* vma_pkey() */
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
+#include <asm/desc.h> /* store_idt(), ... */

#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -571,10 +572,53 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
return 0;
}

+static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
+{
+ u32 offset = (index >> 3) * sizeof(struct desc_struct);
+ unsigned long addr;
+ struct ldttss_desc desc;
+
+ if (index == 0) {
+ pr_alert("%s: NULL\n", name);
+ return;
+ }
+
+ if (offset + sizeof(struct ldttss_desc) >= gdt->size) {
+ pr_alert("%s: 0x%hx -- out of bounds\n", name, index);
+ return;
+ }
+
+ if (probe_kernel_read(&desc, (void *)(gdt->address + offset),
+ sizeof(struct ldttss_desc))) {
+ pr_alert("%s: 0x%hx -- GDT entry is not readable\n",
+ name, index);
+ return;
+ }
+
+ addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24);
+#ifdef CONFIG_X86_64
+ addr |= ((u64)desc.base3 << 32);
+#endif
+ pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n",
+ name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
+}
+
+static void errstr(unsigned long ec, char *buf, unsigned long mask,
+ const char *txt)
+{
+ if (ec & mask) {
+ if (buf[0])
+ strcat(buf, " ");
+ strcat(buf, txt);
+ }
+}
+
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
+ char errtxt[64];
+
if (!oops_may_print())
return;

@@ -602,6 +646,46 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
(void *)address);

+ errtxt[0] = 0;
+ errstr(error_code, errtxt, X86_PF_PROT, "PROT");
+ errstr(error_code, errtxt, X86_PF_WRITE, "WRITE");
+ errstr(error_code, errtxt, X86_PF_USER, "USER");
+ errstr(error_code, errtxt, X86_PF_RSVD, "RSVD");
+ errstr(error_code, errtxt, X86_PF_INSTR, "INSTR");
+ errstr(error_code, errtxt, X86_PF_PK, "PK");
+ pr_alert("HW error: %s\n", error_code ? errtxt :
+ "normal kernel read fault");
+ if (!(error_code & X86_PF_USER) && user_mode(regs)) {
+ struct desc_ptr idt, gdt;
+ u16 ldtr, tr;
+
+ pr_alert("This was a system access from user code\n");
+
+ /*
+ * This can happen for quite a few reasons. The more obvious
+ * ones are faults accessing the GDT, or LDT. Perhaps
+ * surprisingly, if the CPU tries to deliver a benign or
+ * contributory exception from user code and gets a page fault
+ * during delivery, the page fault can be delivered as though
+ * it originated directly from user code. This could happen
+ * due to wrong permissions on the IDT, GDT, LDT, TSS, or
+ * kernel or IST stack.
+ */
+ store_idt(&idt);
+
+ /* Usable even on Xen PV -- it's just slow. */
+ native_store_gdt(&gdt);
+
+ pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n",
+ idt.address, idt.size, gdt.address, gdt.size);
+
+ store_ldt(ldtr);
+ show_ldttss(&gdt, "LDTR", ldtr);
+
+ store_tr(tr);
+ show_ldttss(&gdt, "TR", tr);
+ }
+
dump_pagetable(address);
}