[PATCH v2] mm/damon/vaddr: Safely walk page table

From: SeongJae Park
Date: Tue Aug 31 2021 - 12:18:48 EST


From: SeongJae Park <sjpark@xxxxxxxxx>

Commit d7f647622761 ("mm/damon: implement primitives for the virtual
memory address spaces") of linux-mm[1] tries to find PTE or PMD for
arbitrary virtual address using 'follow_invalidate_pte()' without proper
locking[2]. This commit fixes the issue by using another page table
walk function for more general use case ('walk_page_range()') under
proper locking (holding mmap read lock).

[1] https://github.com/hnaz/linux-mm/commit/d7f647622761
[2] https://lore.kernel.org/linux-mm/3b094493-9c1e-6024-bfd5-7eca66399b7e@xxxxxxxxxx

Fixes: d7f647622761 ("mm/damon: implement primitives for the virtual memory address spaces")
Reported-by: David Hildenbrand <david@xxxxxxxxxx>
Signed-off-by: SeongJae Park <sjpark@xxxxxxxxx>
---
Changes from v1 (https://lore.kernel.org/linux-mm/20210827150400.6305-1-sj38.park@xxxxxxxxx/)
- Hold only mmap read lock (David Hildenbrand)
- Access the PTE/PMD from the walk_page_range() callbacks (David Hildenbrand)

mm/damon/vaddr.c | 136 +++++++++++++++++++++++++++++++++--------------
1 file changed, 97 insertions(+), 39 deletions(-)

diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 230db7413278..58c1fb2aafa9 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -8,10 +8,12 @@
#define pr_fmt(fmt) "damon-va: " fmt

#include <linux/damon.h>
+#include <linux/hugetlb.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
#include <linux/highmem.h>
#include <linux/page_idle.h>
+#include <linux/pagewalk.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
@@ -446,22 +448,42 @@ static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
}

-static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
- pte_t *pte = NULL;
- pmd_t *pmd = NULL;
+ pte_t *pte;
spinlock_t *ptl;

- if (follow_invalidate_pte(mm, addr, NULL, &pte, &pmd, &ptl))
- return;
-
- if (pte) {
- damon_ptep_mkold(pte, mm, addr);
- pte_unmap_unlock(pte, ptl);
- } else {
- damon_pmdp_mkold(pmd, mm, addr);
+ if (pmd_huge(*pmd)) {
+ ptl = pmd_lock(walk->mm, pmd);
+ if (pmd_huge(*pmd)) {
+ damon_pmdp_mkold(pmd, walk->mm, addr);
+ spin_unlock(ptl);
+ return 0;
+ }
spin_unlock(ptl);
}
+
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ return 0;
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte_present(*pte))
+ goto out;
+ damon_ptep_mkold(pte, walk->mm, addr);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+}
+
+static struct mm_walk_ops damon_mkold_ops = {
+ .pmd_entry = damon_mkold_pmd_entry,
+};
+
+static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+{
+ mmap_read_lock(mm);
+ walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
+ mmap_read_unlock(mm);
}

/*
@@ -492,43 +514,79 @@ void damon_va_prepare_access_checks(struct damon_ctx *ctx)
}
}

-static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
- unsigned long *page_sz)
+struct damon_young_walk_private {
+ unsigned long *page_sz;
+ bool young;
+};
+
+static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
- pte_t *pte = NULL;
- pmd_t *pmd = NULL;
+ pte_t *pte;
spinlock_t *ptl;
struct page *page;
- bool young = false;
-
- if (follow_invalidate_pte(mm, addr, NULL, &pte, &pmd, &ptl))
- return false;
-
- *page_sz = PAGE_SIZE;
- if (pte) {
- page = damon_get_page(pte_pfn(*pte));
- if (page && (pte_young(*pte) || !page_is_idle(page) ||
- mmu_notifier_test_young(mm, addr)))
- young = true;
- if (page)
- put_page(page);
- pte_unmap_unlock(pte, ptl);
- return young;
- }
+ struct damon_young_walk_private *priv = walk->private;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- page = damon_get_page(pmd_pfn(*pmd));
- if (page && (pmd_young(*pmd) || !page_is_idle(page) ||
- mmu_notifier_test_young(mm, addr)))
- young = true;
- if (page)
+ if (pmd_huge(*pmd)) {
+ ptl = pmd_lock(walk->mm, pmd);
+ if (!pmd_huge(*pmd)) {
+ spin_unlock(ptl);
+ goto regular_page;
+ }
+ page = damon_get_page(pmd_pfn(*pmd));
+ if (!page)
+ goto huge_out;
+ if (pmd_young(*pmd) || !page_is_idle(page) ||
+ mmu_notifier_test_young(walk->mm,
+ addr)) {
+ *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT);
+ priv->young = true;
+ }
put_page(page);
+huge_out:
+ spin_unlock(ptl);
+ return 0;
+ }

- spin_unlock(ptl);
- *page_sz = ((1UL) << HPAGE_PMD_SHIFT);
+regular_page:
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

- return young;
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ return -EINVAL;
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte_present(*pte))
+ goto out;
+ page = damon_get_page(pte_pfn(*pte));
+ if (!page)
+ goto out;
+ if (pte_young(*pte) || !page_is_idle(page) ||
+ mmu_notifier_test_young(walk->mm, addr)) {
+ *priv->page_sz = PAGE_SIZE;
+ priv->young = true;
+ }
+ put_page(page);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+}
+
+static struct mm_walk_ops damon_young_ops = {
+ .pmd_entry = damon_young_pmd_entry,
+};
+
+static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
+ unsigned long *page_sz)
+{
+ struct damon_young_walk_private arg = {
+ .page_sz = page_sz,
+ .young = false,
+ };
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
+ mmap_read_unlock(mm);
+ return arg.young;
}

/*
--
2.17.1