[RFC 02/20] mm/mprotect: use mmu_gather

From: Nadav Amit
Date: Sat Jan 30 2021 - 19:17:24 EST


From: Nadav Amit <namit@xxxxxxxxxx>

change_pXX_range() currently does not use mmu_gather, but instead
implements its own deferred TLB flushes scheme. This both complicates
the code, as developers need to be aware of different invalidation
schemes, and prevents.

Use mmu_gather in change_pXX_range(). As the pages are not released,
only record the flushed range using tlb_flush_pXX_range().

Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Yu Zhao <yuzhao@xxxxxxxxxx>
Cc: Nick Piggin <npiggin@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
---
include/linux/huge_mm.h | 3 ++-
mm/huge_memory.c | 4 +++-
mm/mprotect.c | 51 ++++++++++++++++++++---------------------
3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6a19f35f836b..6eff7f59a778 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -37,7 +37,8 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd);
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
- pgprot_t newprot, unsigned long cp_flags);
+ pgprot_t newprot, unsigned long cp_flags,
+ struct mmu_gather *tlb);
vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
pgprot_t pgprot, bool write);

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9237976abe72..c345b8b06183 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1797,7 +1797,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
* - HPAGE_PMD_NR is protections changed and TLB flush necessary
*/
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, pgprot_t newprot, unsigned long cp_flags)
+ unsigned long addr, pgprot_t newprot, unsigned long cp_flags,
+ struct mmu_gather *tlb)
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
@@ -1885,6 +1886,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
entry = pmd_clear_uffd_wp(entry);
}
ret = HPAGE_PMD_NR;
+ tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
unlock:
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ab709023e9aa..632d5a677d3f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -32,12 +32,13 @@
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
+#include <asm/tlb.h>

#include "internal.h"

-static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t newprot,
- unsigned long cp_flags)
+static unsigned long change_pte_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pte_t *pte, oldpte;
spinlock_t *ptl;
@@ -138,6 +139,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = pte_mkwrite(ptent);
}
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
+ tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -209,9 +211,9 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
return 0;
}

-static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
- pud_t *pud, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+static inline unsigned long change_pmd_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pud_t *pud, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pmd_t *pmd;
unsigned long next;
@@ -252,7 +254,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
__split_huge_pmd(vma, pmd, addr, false, NULL);
} else {
int nr_ptes = change_huge_pmd(vma, pmd, addr,
- newprot, cp_flags);
+ newprot, cp_flags, tlb);

if (nr_ptes) {
if (nr_ptes == HPAGE_PMD_NR) {
@@ -266,8 +268,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
}
/* fall through, the trans huge pmd just split */
}
- this_pages = change_pte_range(vma, pmd, addr, next, newprot,
- cp_flags);
+ this_pages = change_pte_range(tlb, vma, pmd, addr, next,
+ newprot, cp_flags);
pages += this_pages;
next:
cond_resched();
@@ -281,9 +283,9 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
return pages;
}

-static inline unsigned long change_pud_range(struct vm_area_struct *vma,
- p4d_t *p4d, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+static inline unsigned long change_pud_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
pud_t *pud;
unsigned long next;
@@ -294,16 +296,16 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- pages += change_pmd_range(vma, pud, addr, next, newprot,
+ pages += change_pmd_range(tlb, vma, pud, addr, next, newprot,
cp_flags);
} while (pud++, addr = next, addr != end);

return pages;
}

-static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
- pgd_t *pgd, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+static inline unsigned long change_p4d_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr,
+ unsigned long end, pgprot_t newprot, unsigned long cp_flags)
{
p4d_t *p4d;
unsigned long next;
@@ -314,7 +316,7 @@ static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
- pages += change_pud_range(vma, p4d, addr, next, newprot,
+ pages += change_pud_range(tlb, vma, p4d, addr, next, newprot,
cp_flags);
} while (p4d++, addr = next, addr != end);

@@ -328,25 +330,22 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
unsigned long next;
- unsigned long start = addr;
unsigned long pages = 0;
+ struct mmu_gather tlb;

BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
- flush_cache_range(vma, addr, end);
- inc_tlb_flush_pending(mm);
+ tlb_gather_mmu(&tlb, mm);
+ tlb_start_vma(&tlb, vma);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- pages += change_p4d_range(vma, pgd, addr, next, newprot,
+ pages += change_p4d_range(&tlb, vma, pgd, addr, next, newprot,
cp_flags);
} while (pgd++, addr = next, addr != end);
-
- /* Only flush the TLB if we actually modified any entries: */
- if (pages)
- flush_tlb_range(vma, start, end);
- dec_tlb_flush_pending(mm);
+ tlb_end_vma(&tlb, vma);
+ tlb_finish_mmu(&tlb);

return pages;
}
--
2.25.1