[RFC V1 PATCH mm-hotfixes 3/3] x86/mm: convert {pgd,p4d}_populate{,_init} to _kernel variant

From: Harry Yoo
Date: Wed Jul 09 2025 - 09:19:05 EST


Introduce {pgd,p4d}_populate_kernel_safe() and convert
{pgd,p4d}_populate{,_init}() to {pgd,p4d}_populate_kernel{,_init}().

By converting them, we no longer need to worry about forgetting to
synchronize top level page tables.

With all {pgd,p4d}_populate{,_init}() converted to
{pgd,p4d}_populate_kernel{,_init}(), it is now safe to drop
sync_global_pgds(). Let's remove it.

Cc: <stable@xxxxxxxxxxxxxxx>
Suggested-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Harry Yoo <harry.yoo@xxxxxxxxxx>
---
arch/x86/include/asm/pgalloc.h | 19 +++++
arch/x86/mm/init_64.c | 129 ++++++---------------------------
arch/x86/mm/kasan_init_64.c | 8 +-
3 files changed, 46 insertions(+), 110 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index d66f2db54b16..98439b9ca293 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -132,6 +132,15 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu
set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
}

+static inline void p4d_populate_kernel_safe(unsigned long addr,
+ p4d_t *p4d, pud_t *pud)
+{
+ paravirt_alloc_pud(&init_mm, __pa(pud) >> PAGE_SHIFT);
+ set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
+ if (!pgtable_l5_enabled())
+ arch_sync_kernel_pagetables(addr);
+}
+
extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);

static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
@@ -167,6 +176,16 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
}

+static inline void pgd_populate_kernel_safe(unsigned long addr,
+ pgd_t *pgd, p4d_t *p4d)
+{
+ if (!pgtable_l5_enabled())
+ return;
+ paravirt_alloc_p4d(&init_mm, __pa(p4d) >> PAGE_SHIFT);
+ set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+ arch_sync_kernel_pagetables(addr);
+}
+
extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);

static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index cbddbef434d5..00608ab36936 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -75,6 +75,19 @@ DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
DEFINE_POPULATE(pud_populate, pud, pmd, init)
DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)

+#define DEFINE_POPULATE_KERNEL(fname, type1, type2, init) \
+static inline void fname##_init(unsigned long addr, \
+ type1##_t *arg1, type2##_t *arg2, bool init) \
+{ \
+ if (init) \
+ fname##_safe(addr, arg1, arg2); \
+ else \
+ fname(addr, arg1, arg2); \
+}
+
+DEFINE_POPULATE_KERNEL(pgd_populate_kernel, pgd, p4d, init)
+DEFINE_POPULATE_KERNEL(p4d_populate_kernel, p4d, pud, init)
+
#define DEFINE_ENTRY(type1, type2, init) \
static inline void set_##type1##_init(type1##_t *arg1, \
type2##_t arg2, bool init) \
@@ -130,99 +143,6 @@ static int __init nonx32_setup(char *str)
}
__setup("noexec32=", nonx32_setup);

-static void sync_global_pgds_l5(unsigned long start, unsigned long end)
-{
- unsigned long addr;
-
- for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
- const pgd_t *pgd_ref = pgd_offset_k(addr);
- struct page *page;
-
- /* Check for overflow */
- if (addr < start)
- break;
-
- if (pgd_none(*pgd_ref))
- continue;
-
- spin_lock(&pgd_lock);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- spinlock_t *pgt_lock;
-
- pgd = (pgd_t *)page_address(page) + pgd_index(addr);
- /* the pgt_lock only for Xen */
- pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
- spin_lock(pgt_lock);
-
- if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
-
- spin_unlock(pgt_lock);
- }
- spin_unlock(&pgd_lock);
- }
-}
-
-static void sync_global_pgds_l4(unsigned long start, unsigned long end)
-{
- unsigned long addr;
-
- for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
- pgd_t *pgd_ref = pgd_offset_k(addr);
- const p4d_t *p4d_ref;
- struct page *page;
-
- /*
- * With folded p4d, pgd_none() is always false, we need to
- * handle synchronization on p4d level.
- */
- MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
- p4d_ref = p4d_offset(pgd_ref, addr);
-
- if (p4d_none(*p4d_ref))
- continue;
-
- spin_lock(&pgd_lock);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- p4d_t *p4d;
- spinlock_t *pgt_lock;
-
- pgd = (pgd_t *)page_address(page) + pgd_index(addr);
- p4d = p4d_offset(pgd, addr);
- /* the pgt_lock only for Xen */
- pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
- spin_lock(pgt_lock);
-
- if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
- BUG_ON(p4d_pgtable(*p4d)
- != p4d_pgtable(*p4d_ref));
-
- if (p4d_none(*p4d))
- set_p4d(p4d, *p4d_ref);
-
- spin_unlock(pgt_lock);
- }
- spin_unlock(&pgd_lock);
- }
-}
-
-/*
- * When memory was added make sure all the processes MM have
- * suitable PGD entries in the local PGD level page.
- */
-static void sync_global_pgds(unsigned long start, unsigned long end)
-{
- if (pgtable_l5_enabled())
- sync_global_pgds_l5(start, end);
- else
- sync_global_pgds_l4(start, end);
-}
-
static void sync_kernel_pagetables_l4(unsigned long addr)
{
pgd_t *pgd_ref = pgd_offset_k(addr);
@@ -295,6 +215,10 @@ static void sync_kernel_pagetables_l5(unsigned long addr)
spin_unlock(&pgd_lock);
}

+/*
+ * When memory was added make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
void arch_sync_kernel_pagetables(unsigned long addr)
{
if (pgtable_l5_enabled())
@@ -330,7 +254,7 @@ static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
{
if (pgd_none(*pgd)) {
p4d_t *p4d = (p4d_t *)spp_getpage();
- pgd_populate(&init_mm, pgd, p4d);
+ pgd_populate_kernel(vaddr, pgd, p4d);
if (p4d != p4d_offset(pgd, 0))
printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
p4d, p4d_offset(pgd, 0));
@@ -342,7 +266,7 @@ static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
{
if (p4d_none(*p4d)) {
pud_t *pud = (pud_t *)spp_getpage();
- p4d_populate(&init_mm, p4d, pud);
+ p4d_populate_kernel(vaddr, p4d, pud);
if (pud != pud_offset(p4d, 0))
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
pud, pud_offset(p4d, 0));
@@ -795,7 +719,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
page_size_mask, prot, init);

spin_lock(&init_mm.page_table_lock);
- p4d_populate_init(&init_mm, p4d, pud, init);
+ p4d_populate_kernel_init(vaddr, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
}

@@ -808,7 +732,6 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long page_size_mask,
pgprot_t prot, bool init)
{
- bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;

paddr_last = paddr_end;
@@ -837,18 +760,14 @@ __kernel_physical_mapping_init(unsigned long paddr_start,

spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
- pgd_populate_init(&init_mm, pgd, p4d, init);
+ pgd_populate_kernel_init(vaddr, pgd, p4d, init);
else
- p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
- (pud_t *) p4d, init);
+ p4d_populate_kernel_init(vaddr, p4d_offset(pgd, vaddr),
+ (pud_t *) p4d, init);

spin_unlock(&init_mm.page_table_lock);
- pgd_changed = true;
}

- if (pgd_changed)
- sync_global_pgds(vaddr_start, vaddr_end - 1);
-
return paddr_last;
}

@@ -1642,8 +1561,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
err = -ENOMEM;
} else
err = vmemmap_populate_basepages(start, end, node, NULL);
- if (!err)
- sync_global_pgds(start, end - 1);
return err;
}

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0539efd0d216..e825952d25b2 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -108,7 +108,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
if (p4d_none(*p4d)) {
void *p = early_alloc(PAGE_SIZE, nid, true);

- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}

pud = pud_offset(p4d, addr);
@@ -128,7 +128,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,

if (pgd_none(*pgd)) {
p = early_alloc(PAGE_SIZE, nid, true);
- pgd_populate(&init_mm, pgd, p);
+ pgd_populate_kernel(addr, pgd, p);
}

p4d = p4d_offset(pgd, addr);
@@ -255,7 +255,7 @@ static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,

if (p4d_none(*p4d)) {
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
} while (p4d++, addr = next, addr != end);
}
@@ -273,7 +273,7 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)

if (pgd_none(*pgd)) {
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
- pgd_populate(&init_mm, pgd, p);
+ pgd_populate_kernel(addr, pgd, p);
}

/*
--
2.43.0