Re: pipe/page fault oddness.

From: Linus Torvalds
Date: Wed Oct 01 2014 - 16:21:01 EST


On Wed, Oct 1, 2014 at 9:18 AM, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> So I'd really suggest we do exactly that. Get rid of "pte_numa()"
> entirely, get rid of "_PAGE_[BIT_]NUMA" entirely, and instead add a
> "pte_protnone()" helper to check for the "protnone" case (which on x86
> is testing the _PAGE_PROTNONE bit, and on most other architectures is
> just testing that the page has no access rights).
>
> Then we throw away "pte_mknuma()" and "pte_mknonnuma()" entirely,
> because they are brainless sh*t, and we just use
>
> ptent = ptep_modify_prot_start(mm, addr, pte);
> ptent = pte_modify(ptent, newprot);
> ptep_modify_prot_commit(mm, addr, pte, ptent);
>
> reliably instead (where for the mknuma case "newprot" is PROT_NONE,
> and for mknonnuma() it is vma->vm_page_prot. Yes, that means that you
> have to pass in the vma to those functions, but that just makes sense
> anyway.
>
> And if that means that we lose the numa flag on mprotect etc, nobody sane cares.

So here is a *COMPLETELY UNTESTED* and probably seriously buggy first
version of such a patch. It doesn't do the powerpc conversion, so
somebody would need to check that eventually, but aside from that
obvious issue, can people fix this up? Or comment on why it doesn't
work.

Now, I like this because it gets rid of the horrible PAGE_NUMA special
cases, but it really seems to simplify things in general. Lookie here:

13 files changed, 74 insertions(+), 268 deletions(-)

that's really mainly just the removal of odd and broken numa pte/pmd
helper functions from <asm-generic/pgtable.h> that aren't needed any
more because the normal "change protections" functions just DTRT
automatically. Although there are actually a few other cases that got
simpler too, so it's not *just* removal of those _PAGE_NUMA-specific
helpers.

One thing this does *not* remove is the special pte locking rule in
the "change_*_range()" functions: they still take that broken
"prot_numa" argument. HOWEVER, it isn't actually used for any page
table modifications, the only reason for it existing is the hacky
locking issue (see lock_pte_protection(), and the comment about races
with the transhuge accesses).

Now, I'll be honest: this patch *migth* just work, but I expect it to
have some stupid problem. It compiles. I haven't even dared boot it,
much less try any numa benchmarks that woudln't show anything sane on
my machine anyway.

So I'm really sending this patch out in the hope that it will get
comments, fixup and possibly even testing by people who actually know
the NUMA balancing code. Rik? Anybody?

Linus
arch/x86/include/asm/pgtable.h | 31 +++++--
arch/x86/include/asm/pgtable_types.h | 27 +-----
arch/x86/mm/gup.c | 4 +-
include/asm-generic/pgtable.h | 159 ++---------------------------------
include/linux/huge_mm.h | 3 +-
init/Kconfig | 11 ---
mm/gup.c | 4 +-
mm/huge_memory.c | 42 +++------
mm/memory.c | 16 ++--
mm/mempolicy.c | 2 +-
mm/migrate.c | 1 -
mm/mprotect.c | 40 +++------
mm/pgtable-generic.c | 2 -
13 files changed, 74 insertions(+), 268 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index aa97a070f09f..0d4ec3a62fed 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -299,7 +299,7 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)

static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_PRESENT);
+ return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
}

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -457,8 +457,7 @@ static inline int pte_same(pte_t a, pte_t b)

static inline int pte_present(pte_t a)
{
- return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
- _PAGE_NUMA);
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}

#define pte_present_nonuma pte_present_nonuma
@@ -473,7 +472,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
if (pte_flags(a) & _PAGE_PRESENT)
return true;

- if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
+ if ((pte_flags(a) & _PAGE_PROTNONE) &&
mm_tlb_flush_pending(mm))
return true;

@@ -493,10 +492,26 @@ static inline int pmd_present(pmd_t pmd)
* the _PAGE_PSE flag will remain set at all times while the
* _PAGE_PRESENT bit is clear).
*/
- return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
- _PAGE_NUMA);
+ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}

+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * Technically these work even when not doing NUMA
+ * balancing, but we don't care and have simpler
+ * "always false" versions for that case
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_PROTNONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_PROTNONE;
+}
+#endif
+
static inline int pmd_none(pmd_t pmd)
{
/* Only check low word on 32-bit platforms, since it might be
@@ -554,8 +569,8 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
static inline int pmd_bad(pmd_t pmd)
{
#ifdef CONFIG_NUMA_BALANCING
- /* pmd_numa check */
- if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
+ /* pmd_numa check. Really? */
+ if ((pmd_flags(pmd) & (_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_PROTNONE)
return 0;
#endif
return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index f216963760e5..5438e96b2915 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -28,14 +28,6 @@
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */

-/*
- * Swap offsets on configurations that allow automatic NUMA balancing use the
- * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
- * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
- * maximum possible swap space from 16TB to 8TB.
- */
-#define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1)
-
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
@@ -79,21 +71,6 @@
#endif

/*
- * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
- * that is not present. The hinting fault gathers numa placement statistics
- * (see pte_numa()). The bit is always zero when the PTE is not present.
- *
- * The bit picked must be always zero when the pmd is present and not
- * present, so that we don't lose information when we set it while
- * atomically clearing the present bit.
- */
-#ifdef CONFIG_NUMA_BALANCING
-#define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
-#else
-#define _PAGE_NUMA (_AT(pteval_t, 0))
-#endif
-
-/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
* with swap entry format. On x86 bits 6 and 7 are *not* involved
@@ -126,8 +103,8 @@
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY | _PAGE_NUMA)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)
+ _PAGE_SOFT_DIRTY)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)

#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
#define _PAGE_CACHE_WB (0)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 207d9aef662d..f32e12c44b23 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -84,7 +84,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
struct page *page;

/* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_numa(pte)) {
+ if (pte_protnone(pte)) {
pte_unmap(ptep);
return 0;
}
@@ -178,7 +178,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
- if (pmd_numa(pmd))
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
return 0;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 53b2acc38213..a7c08d9ddeda 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -659,165 +659,24 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
#endif
}

-#ifdef CONFIG_NUMA_BALANCING
-#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+#ifndef CONFIG_NUMA_BALANCING
/*
- * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the
- * same bit too). It's set only when _PAGE_PRESET is not set and it's
- * never set if _PAGE_PRESENT is set.
- *
- * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
- * fault triggers on those regions if pte/pmd_numa returns true
- * (because _PAGE_PRESENT is not set).
- */
-#ifndef pte_numa
-static inline int pte_numa(pte_t pte)
-{
- return (pte_flags(pte) &
- (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA;
-}
-#endif
-
-#ifndef pmd_numa
-static inline int pmd_numa(pmd_t pmd)
-{
- return (pmd_flags(pmd) &
- (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA;
-}
-#endif
-
-/*
- * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
- * because they're called by the NUMA hinting minor page fault. If we
- * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
- * would be forced to set it later while filling the TLB after we
- * return to userland. That would trigger a second write to memory
- * that we optimize away by setting _PAGE_ACCESSED here.
+ * Technically a pte can be PROTNONE even when not
+ * doing NUMA balancing, but the only case where the
+ * kernel actually _cares_ is for the numa balancing
+ * case, so by default we just implement the simpler
+ * "always no" helper function.
*/
-#ifndef pte_mknonnuma
-static inline pte_t pte_mknonnuma(pte_t pte)
-{
- pteval_t val = pte_val(pte);
-
- val &= ~_PAGE_NUMA;
- val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
- return __pte(val);
-}
-#endif
-
-#ifndef pmd_mknonnuma
-static inline pmd_t pmd_mknonnuma(pmd_t pmd)
-{
- pmdval_t val = pmd_val(pmd);
-
- val &= ~_PAGE_NUMA;
- val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
-
- return __pmd(val);
-}
-#endif
-
-#ifndef pte_mknuma
-static inline pte_t pte_mknuma(pte_t pte)
-{
- pteval_t val = pte_val(pte);
-
- val &= ~_PAGE_PRESENT;
- val |= _PAGE_NUMA;
-
- return __pte(val);
-}
-#endif
-
-#ifndef ptep_set_numa
-static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- pte_t ptent = *ptep;
-
- ptent = pte_mknuma(ptent);
- set_pte_at(mm, addr, ptep, ptent);
- return;
-}
-#endif
-
-#ifndef pmd_mknuma
-static inline pmd_t pmd_mknuma(pmd_t pmd)
-{
- pmdval_t val = pmd_val(pmd);
-
- val &= ~_PAGE_PRESENT;
- val |= _PAGE_NUMA;
-
- return __pmd(val);
-}
-#endif
-
-#ifndef pmdp_set_numa
-static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- pmd_t pmd = *pmdp;
-
- pmd = pmd_mknuma(pmd);
- set_pmd_at(mm, addr, pmdp, pmd);
- return;
-}
-#endif
-#else
-extern int pte_numa(pte_t pte);
-extern int pmd_numa(pmd_t pmd);
-extern pte_t pte_mknonnuma(pte_t pte);
-extern pmd_t pmd_mknonnuma(pmd_t pmd);
-extern pte_t pte_mknuma(pte_t pte);
-extern pmd_t pmd_mknuma(pmd_t pmd);
-extern void ptep_set_numa(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-extern void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp);
-#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
-#else
-static inline int pmd_numa(pmd_t pmd)
+static inline int pte_protnone(pte_t pte)
{
return 0;
}

-static inline int pte_numa(pte_t pte)
+static inline int pmd_protnone(pmd_t pmd)
{
return 0;
}
-
-static inline pte_t pte_mknonnuma(pte_t pte)
-{
- return pte;
-}
-
-static inline pmd_t pmd_mknonnuma(pmd_t pmd)
-{
- return pmd;
-}
-
-static inline pte_t pte_mknuma(pte_t pte)
-{
- return pte;
-}
-
-static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- return;
-}
-
-
-static inline pmd_t pmd_mknuma(pmd_t pmd)
-{
- return pmd;
-}
-
-static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- return ;
-}
-#endif /* CONFIG_NUMA_BALANCING */
+#endif

#endif /* CONFIG_MMU */

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 63579cb8d3dc..2a2ac91eafde 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -31,8 +31,7 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, pgprot_t newprot,
- int prot_numa);
+ unsigned long addr, pgprot_t newprot);

enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/init/Kconfig b/init/Kconfig
index e84c6423a2e5..f5c6bb56617a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -887,17 +887,6 @@ config ARCH_SUPPORTS_INT128
config ARCH_WANT_NUMA_VARIABLE_LOCALITY
bool

-#
-# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
-config ARCH_WANTS_PROT_NUMA_PROT_NONE
- bool
-
-config ARCH_USES_NUMA_PROT_NONE
- bool
- default y
- depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
- depends on NUMA_BALANCING
-
config NUMA_BALANCING_DEFAULT_ENABLED
bool "Automatically enable NUMA aware memory/task placement"
default y
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..7a7419da2da9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -60,7 +60,7 @@ retry:
migration_entry_wait(mm, pmd, address);
goto retry;
}
- if ((flags & FOLL_NUMA) && pte_numa(pte))
+ if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
if ((flags & FOLL_WRITE) && !pte_write(pte)) {
pte_unmap_unlock(ptep, ptl);
@@ -189,7 +189,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
}
return page;
}
- if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
return no_page_table(vma, flags);
if (pmd_trans_huge(*pmd)) {
if (flags & FOLL_SPLIT) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d9a21d06b862..14de54af6c38 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1223,7 +1223,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
return ERR_PTR(-EFAULT);

/* Full NUMA hinting faults to serialise migration in fault paths */
- if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
goto out;

page = pmd_page(*pmd);
@@ -1366,9 +1366,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out;
clear_pmdnuma:
BUG_ON(!PageLocked(page));
- pmd = pmd_mknonnuma(pmd);
+ pmd = pmd_modify(pmd, vma->vm_page_prot);
set_pmd_at(mm, haddr, pmdp, pmd);
- VM_BUG_ON(pmd_numa(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
out_unlock:
@@ -1502,7 +1501,7 @@ out:
* - HPAGE_PMD_NR is protections changed and TLB flush necessary
*/
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, pgprot_t newprot, int prot_numa)
+ unsigned long addr, pgprot_t newprot)
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
@@ -1511,29 +1510,12 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pmd_t entry;
ret = 1;
- if (!prot_numa) {
- entry = pmdp_get_and_clear(mm, addr, pmd);
- if (pmd_numa(entry))
- entry = pmd_mknonnuma(entry);
- entry = pmd_modify(entry, newprot);
- ret = HPAGE_PMD_NR;
- set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(pmd_write(entry));
- } else {
- struct page *page = pmd_page(*pmd);

- /*
- * Do not trap faults against the zero page. The
- * read-only data is likely to be read-cached on the
- * local CPU cache and it is less useful to know about
- * local vs remote hits on the zero page.
- */
- if (!is_huge_zero_page(page) &&
- !pmd_numa(*pmd)) {
- pmdp_set_numa(mm, addr, pmd);
- ret = HPAGE_PMD_NR;
- }
- }
+ entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmd_modify(entry, newprot);
+ ret = HPAGE_PMD_NR;
+ set_pmd_at(mm, addr, pmd, entry);
+ BUG_ON(pmd_write(entry));
spin_unlock(ptl);
}

@@ -1794,15 +1776,17 @@ static int __split_huge_page_map(struct page *page,
haddr = address;
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
pte_t *pte, entry;
+ pgprot_t newprot = vma->vm_page_prot;
+
BUG_ON(PageCompound(page+i));
- entry = mk_pte(page + i, vma->vm_page_prot);
+ if (pmd_protnone(*pmd))
+ newprot = PAGE_NONE;
+ entry = mk_pte(page + i, newprot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (!pmd_write(*pmd))
entry = pte_wrprotect(entry);
if (!pmd_young(*pmd))
entry = pte_mkold(entry);
- if (pmd_numa(*pmd))
- entry = pte_mknuma(entry);
pte = pte_offset_map(&_pmd, haddr);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
diff --git a/mm/memory.c b/mm/memory.c
index e229970e4223..62f1b7b82aef 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3118,9 +3118,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
* validation through pte_unmap_same(). It's of NUMA type but
* the pfn may be screwed if the read is non atomic.
*
- * ptep_modify_prot_start is not called as this is clearing
- * the _PAGE_NUMA bit and it is not really expected that there
- * would be concurrent hardware modifications to the PTE.
+ * We can safely just do a "set_pte_at()", because the old
+ * page table entry is not accessible, so there would be no
+ * concurrent hardware modifications to the PTE.
*/
ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
@@ -3129,7 +3129,11 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out;
}

- pte = pte_mknonnuma(pte);
+ /* Make it present again */
+ /* Should we make it writable? We don't have the fault flags */
+ pte = pte_modify(pte, vma->vm_page_prot);
+ pte = pte_mkyoung(pte);
+
set_pte_at(mm, addr, ptep, pte);
update_mmu_cache(vma, addr, ptep);

@@ -3218,7 +3222,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte, pmd, flags, entry);
}

- if (pte_numa(entry))
+ if (pte_protnone(entry))
return do_numa_page(mm, vma, address, entry, pte, pmd);

ptl = pte_lockptr(mm, pmd);
@@ -3296,7 +3300,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (pmd_trans_splitting(orig_pmd))
return 0;

- if (pmd_numa(orig_pmd))
+ if (pmd_protnone(orig_pmd))
return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd);

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8f5330d74f47..5e22f1ff4e33 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -635,7 +635,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
{
int nr_updated;

- nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
+ nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
if (nr_updated)
count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);

diff --git a/mm/migrate.c b/mm/migrate.c
index f78ec9bd454d..2600f33416b1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1901,7 +1901,6 @@ out_fail:
out_dropref:
ptl = pmd_lock(mm, pmd);
if (pmd_same(*pmd, entry)) {
- entry = pmd_mknonnuma(entry);
set_pmd_at(mm, mmun_start, pmd, entry);
update_mmu_cache_pmd(vma, address, &entry);
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c43d557941f8..aef6fe6aba36 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -82,34 +82,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
oldpte = *pte;
if (pte_present(oldpte)) {
pte_t ptent;
- bool updated = false;

- if (!prot_numa) {
- ptent = ptep_modify_prot_start(mm, addr, pte);
- if (pte_numa(ptent))
- ptent = pte_mknonnuma(ptent);
- ptent = pte_modify(ptent, newprot);
- /*
- * Avoid taking write faults for pages we
- * know to be dirty.
- */
- if (dirty_accountable && pte_dirty(ptent))
- ptent = pte_mkwrite(ptent);
- ptep_modify_prot_commit(mm, addr, pte, ptent);
- updated = true;
- } else {
- struct page *page;
-
- page = vm_normal_page(vma, addr, oldpte);
- if (page && !PageKsm(page)) {
- if (!pte_numa(oldpte)) {
- ptep_set_numa(mm, addr, pte);
- updated = true;
- }
- }
- }
- if (updated)
- pages++;
+ ptent = ptep_modify_prot_start(mm, addr, pte);
+ ptent = pte_modify(ptent, newprot);
+ /*
+ * Avoid taking write faults for pages we
+ * know to be dirty.
+ */
+ if (dirty_accountable && pte_dirty(ptent))
+ ptent = pte_mkwrite(ptent);
+ ptep_modify_prot_commit(mm, addr, pte, ptent);
+ pages++;
} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);

@@ -164,8 +147,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
if (next - addr != HPAGE_PMD_SIZE)
split_huge_page_pmd(vma, addr, pmd);
else {
- int nr_ptes = change_huge_pmd(vma, pmd, addr,
- newprot, prot_numa);
+ int nr_ptes = change_huge_pmd(vma, pmd, addr, newprot);

if (nr_ptes) {
if (nr_ptes == HPAGE_PMD_NR) {
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index dfb79e028ecb..c25f94b33811 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -193,8 +193,6 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t entry = *pmdp;
- if (pmd_numa(entry))
- entry = pmd_mknonnuma(entry);
set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}