Re: [RFC PATCH v2 2/3] mm: make pXd_addr_end() functions page-table entry aware

From: Christophe Leroy
Date: Wed Sep 09 2020 - 04:39:00 EST


On Tue, 2020-09-08 at 16:15 +0200, Alexander Gordeev wrote:
> On Tue, Sep 08, 2020 at 10:16:49AM +0200, Christophe Leroy wrote:
> > >Yes, and also two more sources :/
> > > arch/powerpc/mm/kasan/8xx.c
> > > arch/powerpc/mm/kasan/kasan_init_32.c
> > >
> > >But these two are not quite obvious wrt pgd_addr_end() used
> > >while traversing pmds. Could you please clarify a bit?
> > >
> > >
> > >diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
> > >index 2784224..89c5053 100644
> > >--- a/arch/powerpc/mm/kasan/8xx.c
> > >+++ b/arch/powerpc/mm/kasan/8xx.c
> > >@@ -15,8 +15,8 @@
> > > for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) {
> > > pte_basic_t *new;
> > >- k_next = pgd_addr_end(k_cur, k_end);
> > >- k_next = pgd_addr_end(k_next, k_end);
> > >+ k_next = pmd_addr_end(k_cur, k_end);
> > >+ k_next = pmd_addr_end(k_next, k_end);
> >
> > No, I don't think so.
> > On powerpc32 we have only two levels, so pgd and pmd are more or
> > less the same.
> > But pmd_addr_end() as defined in include/asm-generic/pgtable-nopmd.h
> > is a no-op, so I don't think it will work.
> >
> > It is likely that this function should iterate on pgd, then you get
> > pmd = pmd_offset(pud_offset(p4d_offset(pgd)));
>
> It looks like the code iterates over single pmd table while using
> pgd_addr_end() only to skip all the middle levels and bail out
> from the loop.
>
> I would be wary for switching from pmds to pgds, since we are
> trying to minimize impact (especially functional) and the
> rework does not seem that obvious.
>

I've just tested the following change, it works and should fix the
oddity:

diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
index 2784224054f8..8e53ddf57b84 100644
--- a/arch/powerpc/mm/kasan/8xx.c
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -9,11 +9,12 @@
static int __init
kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void
*block)
{
- pmd_t *pmd = pmd_off_k(k_start);
+ pgd_t *pgd = pgd_offset_k(k_start);
unsigned long k_cur, k_next;

- for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block
+= SZ_8M) {
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pgd += 2, block
+= SZ_8M) {
pte_basic_t *new;
+ pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd, k_cur), k_cur),
k_cur);

k_next = pgd_addr_end(k_cur, k_end);
k_next = pgd_addr_end(k_next, k_end);
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c
b/arch/powerpc/mm/kasan/kasan_init_32.c
index fb294046e00e..e5f524fa71a7 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -30,13 +30,12 @@ static void __init kasan_populate_pte(pte_t *ptep,
pgprot_t prot)

int __init kasan_init_shadow_page_tables(unsigned long k_start,
unsigned long k_end)
{
- pmd_t *pmd;
+ pgd_t *pgd = pgd_offset_k(k_start);
unsigned long k_cur, k_next;

- pmd = pmd_off_k(k_start);
-
- for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pgd++) {
pte_t *new;
+ pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd, k_cur), k_cur),
k_cur);

k_next = pgd_addr_end(k_cur, k_end);
if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
@@ -189,16 +188,18 @@ void __init kasan_early_init(void)
unsigned long addr = KASAN_SHADOW_START;
unsigned long end = KASAN_SHADOW_END;
unsigned long next;
- pmd_t *pmd = pmd_off_k(addr);
+ pgd_t *pgd = pgd_offset_k(addr);

BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);

kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);

do {
+ pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd, addr), addr),
addr);
+
next = pgd_addr_end(addr, end);
pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
- } while (pmd++, addr = next, addr != end);
+ } while (pgd++, addr = next, addr != end);

if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
kasan_early_hash_table();
---
Christophe