[tip:x86/mm] x86/mm/dump_pagetables: Speed up page tables dump for CONFIG_KASAN=y

From: tip-bot for Andrey Ryabinin
Date: Tue Jul 25 2017 - 09:55:02 EST


Commit-ID: 04b67022fb6d5b13025591f61a487a6ef7f4f05c
Gitweb: http://git.kernel.org/tip/04b67022fb6d5b13025591f61a487a6ef7f4f05c
Author: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx>
AuthorDate: Mon, 24 Jul 2017 18:25:58 +0300
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Tue, 25 Jul 2017 11:22:09 +0200

x86/mm/dump_pagetables: Speed up page tables dump for CONFIG_KASAN=y

KASAN fills kernel page tables with repeated values to map several
TBs of the virtual memory to the single kasan_zero_page:
kasan_zero_p4d ->
kasan_zero_pud ->
kasan_zero_pmd->
kasan_zero_pte->
kasan_zero_page

Walking the whole KASAN shadow range takes a lot of time, especially
with 5-level page tables. Since we already know that all kasan page tables
eventually point to the kasan_zero_page we could call note_page()
right and avoid walking lower levels of the page tables.
This will not affect the output of the kernel_page_tables file,
but let us avoid spending time in page table walkers:

Before:

$ time cat /sys/kernel/debug/kernel_page_tables > /dev/null

real 0m55.855s
user 0m0.000s
sys 0m55.840s

After:

$ time cat /sys/kernel/debug/kernel_page_tables > /dev/null

real 0m0.054s
user 0m0.000s
sys 0m0.054s

Signed-off-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx>
Acked-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Alexander Potapenko <glider@xxxxxxxxxx>
Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/20170724152558.24689-1-aryabinin@xxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/mm/dump_pagetables.c | 64 +++++++++++++++++++++++++++----------------
1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index b371ab6..5e3ac6f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -13,12 +13,12 @@
*/

#include <linux/debugfs.h>
+#include <linux/kasan.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>

-#include <asm/kasan.h>
#include <asm/pgtable.h>

/*
@@ -302,23 +302,53 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
start++;
}
}
+#ifdef CONFIG_KASAN
+
+/*
+ * This is an optimization for KASAN=y case. Since all kasan page tables
+ * eventually point to the kasan_zero_page we could call note_page()
+ * right away without walking through lower level page tables. This saves
+ * us dozens of seconds (minutes for 5-level config) while checking for
+ * W+X mapping or reading kernel_page_tables debugfs file.
+ */
+static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
+ void *pt)
+{
+ if (__pa(pt) == __pa(kasan_zero_pmd) ||
+#ifdef CONFIG_X86_5LEVEL
+ __pa(pt) == __pa(kasan_zero_p4d) ||
+#endif
+ __pa(pt) == __pa(kasan_zero_pud)) {
+ pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
+ note_page(m, st, __pgprot(prot), 5);
+ return true;
+ }
+ return false;
+}
+#else
+static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
+ void *pt)
+{
+ return false;
+}
+#endif

#if PTRS_PER_PMD > 1

static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
{
int i;
- pmd_t *start;
+ pmd_t *start, *pmd_start;
pgprotval_t prot;

- start = (pmd_t *)pud_page_vaddr(addr);
+ pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
if (pmd_large(*start) || !pmd_present(*start)) {
prot = pmd_flags(*start);
note_page(m, st, __pgprot(prot), 4);
- } else {
+ } else if (!kasan_page_table(m, st, pmd_start)) {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
}
@@ -336,34 +366,22 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,

#if PTRS_PER_PUD > 1

-/*
- * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
- * KASAN fills page tables with the same values. Since there is no
- * point in checking page table more than once we just skip repeated
- * entries. This saves us dozens of seconds during boot.
- */
-static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
-{
- return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
-}
-
static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
{
int i;
- pud_t *start;
+ pud_t *start, *pud_start;
pgprotval_t prot;
pud_t *prev_pud = NULL;

- start = (pud_t *)p4d_page_vaddr(addr);
+ pud_start = start = (pud_t *)p4d_page_vaddr(addr);

for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
- if (!pud_none(*start) &&
- !pud_already_checked(prev_pud, start, st->check_wx)) {
+ if (!pud_none(*start)) {
if (pud_large(*start) || !pud_present(*start)) {
prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 3);
- } else {
+ } else if (!kasan_page_table(m, st, pud_start)) {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
}
@@ -386,10 +404,10 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
{
int i;
- p4d_t *start;
+ p4d_t *start, *p4d_start;
pgprotval_t prot;

- start = (p4d_t *)pgd_page_vaddr(addr);
+ p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);

for (i = 0; i < PTRS_PER_P4D; i++) {
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
@@ -397,7 +415,7 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
if (p4d_large(*start) || !p4d_present(*start)) {
prot = p4d_flags(*start);
note_page(m, st, __pgprot(prot), 2);
- } else {
+ } else if (!kasan_page_table(m, st, p4d_start)) {
walk_pud_level(m, st, *start,
P + i * P4D_LEVEL_MULT);
}