Re: [PATCH v4 16/20] powerpc/mm: Extend pte_fragment functionality to nohash/32

From: Christophe LEROY
Date: Tue Sep 25 2018 - 12:48:47 EST




Le 19/09/2018 Ã 05:03, Aneesh Kumar K.V a ÃcritÂ:
On 9/18/18 10:27 PM, Christophe Leroy wrote:
In order to allow the 8xx to handle pte_fragments, this patch
extends the use of pte_fragments to nohash/32 platforms.

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
---
 arch/powerpc/include/asm/mmu-40x.h | 1 +
 arch/powerpc/include/asm/mmu-44x.h | 1 +
 arch/powerpc/include/asm/mmu-8xx.h | 1 +
 arch/powerpc/include/asm/mmu-book3e.h | 1 +
 arch/powerpc/include/asm/mmu_context.h | 2 +-
 arch/powerpc/include/asm/nohash/32/pgalloc.h | 43 +++++++++++-----------------
 arch/powerpc/include/asm/nohash/32/pgtable.h | 7 +++--
 arch/powerpc/include/asm/page.h | 6 +---
 arch/powerpc/include/asm/pgtable.h | 8 ++++++
 arch/powerpc/mm/Makefile | 3 ++
 arch/powerpc/mm/mmu_context_nohash.c | 1 +
 arch/powerpc/mm/pgtable-frag.c | 6 ++++
 arch/powerpc/mm/pgtable_32.c | 8 ++++--
 13 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-40x.h b/arch/powerpc/include/asm/mmu-40x.h
index 74f4edb5916e..7c77ceed71d6 100644
--- a/arch/powerpc/include/asm/mmu-40x.h
+++ b/arch/powerpc/include/asm/mmu-40x.h
@@ -58,6 +58,7 @@ typedef struct {
ÂÂÂÂÂ unsigned intÂÂÂ id;
ÂÂÂÂÂ unsigned intÂÂÂ active;
ÂÂÂÂÂ unsigned longÂÂÂ vdso_base;
+ÂÂÂ void *pte_frag;
 } mm_context_t;

 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 295b3dbb2698..3d72e889ae7b 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -109,6 +109,7 @@ typedef struct {
ÂÂÂÂÂ unsigned intÂÂÂ id;
ÂÂÂÂÂ unsigned intÂÂÂ active;
ÂÂÂÂÂ unsigned longÂÂÂ vdso_base;
+ÂÂÂ void *pte_frag;
 } mm_context_t;

 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index fa05aa566ece..750cef6f65e3 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -179,6 +179,7 @@ typedef struct {
ÂÂÂÂÂ unsigned int id;
ÂÂÂÂÂ unsigned int active;
ÂÂÂÂÂ unsigned long vdso_base;
+ÂÂÂ void *pte_frag;
 #ifdef CONFIG_PPC_MM_SLICES
ÂÂÂÂÂ u16 user_psize;ÂÂÂÂÂÂÂ /* page size index */
ÂÂÂÂÂ unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index e20072972e35..8e8aad5172ab 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -230,6 +230,7 @@ typedef struct {
ÂÂÂÂÂ unsigned intÂÂÂ id;
ÂÂÂÂÂ unsigned intÂÂÂ active;
ÂÂÂÂÂ unsigned longÂÂÂ vdso_base;
+ÂÂÂ void *pte_frag;
 } mm_context_t;

 /* Page size definitions, common between 32 and 64-bit
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b2f89b621b15..7f2c37a3f99d 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -222,7 +222,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
ÂÂÂÂÂ return 0;
 }

-#ifndef CONFIG_PPC_BOOK3S_64
+#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_BOOK3S_32)
 static inline void arch_exit_mmap(struct mm_struct *mm)
 {
 }
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index f3fec9052f31..e69423ad8e2e 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -27,6 +27,9 @@ extern void __bad_pte(pmd_t *pmd);
 extern struct kmem_cache *pgtable_cache[];
 #define PGT_CACHE(shift) pgtable_cache[shift]

+pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel);
+void pte_fragment_free(unsigned long *table, int kernel);
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
ÂÂÂÂÂ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -58,11 +61,10 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_t pte_page)
 {
-ÂÂÂ *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER |
-ÂÂÂÂÂÂÂÂÂÂÂÂÂ _PMD_PRESENT);
+ÂÂÂ *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
 }

-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
 #else

 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
@@ -74,49 +76,38 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_t pte_page)
 {
-ÂÂÂ *pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
+ÂÂÂ *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
 }

-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
 #endif

-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ unsigned long address)
 {
-ÂÂÂ return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ÂÂÂ return (pte_t *)pte_fragment_alloc(mm, address, 1);
 }

-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ unsigned long address)
 {
-ÂÂÂ struct page *ptepage;
-
-ÂÂÂ gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
-
-ÂÂÂ ptepage = alloc_pages(flags, 0);
-ÂÂÂ if (!ptepage)
-ÂÂÂÂÂÂÂ return NULL;
-ÂÂÂ if (!pgtable_page_ctor(ptepage)) {
-ÂÂÂÂÂÂÂ __free_page(ptepage);
-ÂÂÂÂÂÂÂ return NULL;
-ÂÂÂ }
-ÂÂÂ return ptepage;
+ÂÂÂ return (pgtable_t)pte_fragment_alloc(mm, address, 0);
 }

 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-ÂÂÂ free_page((unsigned long)pte);
+ÂÂÂ pte_fragment_free((unsigned long *)pte, 1);
 }

 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 {
-ÂÂÂ pgtable_page_dtor(ptepage);
-ÂÂÂ __free_page(ptepage);
+ÂÂÂ pte_fragment_free((unsigned long *)ptepage, 0);
 }

 static inline void pgtable_free(void *table, unsigned index_size)
 {
ÂÂÂÂÂ if (!index_size) {
-ÂÂÂÂÂÂÂ pgtable_page_dtor(virt_to_page(table));
-ÂÂÂÂÂÂÂ free_page((unsigned long)table);
+ÂÂÂÂÂÂÂ pte_fragment_free((unsigned long *)table, 0);
ÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂ BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
ÂÂÂÂÂÂÂÂÂ kmem_cache_free(PGT_CACHE(index_size), table);
@@ -155,6 +146,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ unsigned long address)
 {
ÂÂÂÂÂ tlb_flush_pgtable(tlb, address);
-ÂÂÂ pgtable_free_tlb(tlb, page_address(table), 0);
+ÂÂÂ pgtable_free_tlb(tlb, table, 0);
 }
 #endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index d2908a8038e8..73e2b1fbdb36 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -336,12 +336,12 @@ static inline int pte_young(pte_t pte)
ÂÂ */
 #ifndef CONFIG_BOOKE
 #define pmd_page_vaddr(pmd) \
-ÂÂÂ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+ÂÂÂ ((unsigned long)__va(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
 #define pmd_page(pmd) \
ÂÂÂÂÂ pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
 #else
 #define pmd_page_vaddr(pmd) \
-ÂÂÂ ((unsigned long) (pmd_val(pmd) & PAGE_MASK))
+ÂÂÂ ((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
 #define pmd_page(pmd) \
ÂÂÂÂÂ pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
 #endif
@@ -360,7 +360,8 @@ static inline int pte_young(pte_t pte)
ÂÂÂÂÂ (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pte_index(addr))
 #define pte_offset_map(dir, addr) \
-ÂÂÂ ((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
+ÂÂÂ ((pte_t *)(kmap_atomic(pmd_page(*(dir))) + \
+ÂÂÂÂÂÂÂÂÂÂ (pmd_page_vaddr(*(dir)) & ~PAGE_MASK)) + pte_index(addr))
 #define pte_unmap(pte) kunmap_atomic(pte)

 /*
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f6a1265face2..27d1c16601ee 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -335,7 +335,7 @@ void arch_free_page(struct page *page, int order);
 #endif

 struct vm_area_struct;
-#ifdef CONFIG_PPC_BOOK3S_64
+#if !defined(CONFIG_PPC_BOOK3E_64) && !defined(CONFIG_PPC_BOOK3S_32)
 /*
ÂÂ * For BOOK3s 64 with 4k and 64K linux page size
ÂÂ * we want to use pointers, because the page table
@@ -343,12 +343,8 @@ struct vm_area_struct;
ÂÂ */
 typedef pte_t *pgtable_t;
 #else
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
-typedef pte_t *pgtable_t;
-#else
 typedef struct page *pgtable_t;
 #endif
-#endif



Now that is getting complicated. Is there a way to move that to platform header instead of that complicated #if?

Ok, added two new patches for that in v5 (one distributes mmu-xxx.h in platform dirs, the other moves pgtable_t typedefs in relevant files)


 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 8b38f7730211..1865a3e4ab8c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -94,12 +94,20 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr);
 void pgtable_cache_add(unsigned int shift);
 void pgtable_cache_init(void);

+pte_t *early_alloc_pte(void);
+
 #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
 void mark_initmem_nx(void);
 #else
 static inline void mark_initmem_nx(void) { }
 #endif

+#ifndef PTE_FRAG_NR
+#define PTE_FRAG_NRÂÂÂÂÂÂÂ 1
+#define PTE_FRAG_SIZE_SHIFTÂÂÂ PAGE_SHIFT
+#define PTE_FRAG_SIZEÂÂÂÂÂÂÂ PAGE_SIZE
+#endif
+

IMHO we should avoid that. The #ifndef challenge is that we should always make sure the header inclusion is correct so that platform headers get included before. Why not move it to the platform that want to use pte fragmentation?

Ok, in v5 functions using it now defined static inline in platform headers so moved them there are well.



 #endif /* __ASSEMBLY__ */

 #endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index bd43b3ee52cb..e1deb15fe85e 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_BOOK3E_64)ÂÂ += pgtable-book3e.o
 obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o \
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o \
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable-frag.o
+ifndef CONFIG_PPC_BOOK3S_32
+obj-$(CONFIG_PPC32)ÂÂÂÂÂÂÂ += pgtable-frag.o
+endif
 obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
 obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
 obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4d80239ef83c..98f0ef463dc8 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -385,6 +385,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
 #endif
ÂÂÂÂÂ mm->context.id = MMU_NO_CONTEXT;
ÂÂÂÂÂ mm->context.active = 0;
+ÂÂÂ mm->context.pte_frag = NULL;
ÂÂÂÂÂ return 0;
 }

diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index ab4910e92aaf..d554a1cbc56d 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -30,6 +30,7 @@ static void pte_frag_destroy(void *pte_frag)
ÂÂÂÂÂ }
 }

+#ifdef CONFIG_PPC_BOOK3S_64
 static void pmd_frag_destroy(void *pmd_frag)
 {
ÂÂÂÂÂ int count;
@@ -44,6 +45,7 @@ static void pmd_frag_destroy(void *pmd_frag)
ÂÂÂÂÂÂÂÂÂ __free_page(page);
ÂÂÂÂÂ }
 }
+#endif

 static void destroy_pagetable_cache(struct mm_struct *mm)
 {
@@ -53,15 +55,18 @@ static void destroy_pagetable_cache(struct mm_struct *mm)
ÂÂÂÂÂ if (frag)
ÂÂÂÂÂÂÂÂÂ pte_frag_destroy(frag);

+#ifdef CONFIG_PPC_BOOK3S_64
ÂÂÂÂÂ frag = mm->context.pmd_frag;
ÂÂÂÂÂ if (frag)
ÂÂÂÂÂÂÂÂÂ pmd_frag_destroy(frag);
+#endif
 }

 void arch_exit_mmap(struct mm_struct *mm)
 {
ÂÂÂÂÂ destroy_pagetable_cache(mm);

+#ifdef CONFIG_PPC_BOOK3S_64
ÂÂÂÂÂ if (radix_enabled()) {
ÂÂÂÂÂÂÂÂÂ /*
ÂÂÂÂÂÂÂÂÂÂ * Radix doesn't have a valid bit in the process table
@@ -79,6 +84,7 @@ void arch_exit_mmap(struct mm_struct *mm)
ÂÂÂÂÂÂÂÂÂÂ */
ÂÂÂÂÂÂÂÂÂ process_tb[mm->context.id].prtb0 = 0;
ÂÂÂÂÂ }
+#endif
 }


is there a way to avoid all that #ifdef? May be redo the frag code such that we have few helpers that is platform independent?

Yes, in v5 reworked to keep platform specific arch_exit_mmap() and destroy_pagetable_cache().

Christophe


 static pte_t *get_pte_from_cache(struct mm_struct *mm)
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 7900b613e6e5..81e6b18d1955 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -195,12 +195,16 @@ EXPORT_SYMBOL(iounmap);
 static __init pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
 {
ÂÂÂÂÂ if (!pmd_present(*pmdp)) {
-ÂÂÂÂÂÂÂ pte_t *ptep = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+ÂÂÂÂÂÂÂ pte_t *ptep = __va(memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE));

ÂÂÂÂÂÂÂÂÂ if (!ptep)
ÂÂÂÂÂÂÂÂÂÂÂÂÂ return NULL;

-ÂÂÂÂÂÂÂ clear_page(ptep);
+ÂÂÂÂÂÂÂ if (PTE_FRAG_SIZE == PAGE_SIZE)
+ÂÂÂÂÂÂÂÂÂÂÂ clear_page(ptep);
+ÂÂÂÂÂÂÂ else
+ÂÂÂÂÂÂÂÂÂÂÂ memset(ptep, 0, PTE_FRAG_SIZE);
+
ÂÂÂÂÂÂÂÂÂ pmd_populate_kernel(&init_mm, pmdp, ptep);
ÂÂÂÂÂ }
ÂÂÂÂÂ return pte_offset_kernel(pmdp, va);