Re: [PATCH v4 20/36] powerpc: Implement the new page table range API

From: Christophe Leroy
Date: Sat Mar 18 2023 - 05:19:31 EST




Le 17/03/2023 à 04:47, Matthew Wilcox a écrit :
> On Wed, Mar 15, 2023 at 10:18:22AM +0000, Christophe Leroy wrote:
>> I investigated a bit further and can confirm now that the above won't
>> always work, see comment
>> https://elixir.bootlin.com/linux/v6.3-rc2/source/arch/powerpc/include/asm/nohash/32/pgtable.h#L147
>>
>> And then you see
>> https://elixir.bootlin.com/linux/v6.3-rc2/source/arch/powerpc/include/asm/nohash/pte-e500.h#L63
>
> Got it. Here's what I intend to fold in for the next version:
>
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 7bf1fe7297c6..5f12b9382909 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
> pgprot_val(pgprot));
> }
>
> -static inline unsigned long pte_pfn(pte_t pte)
> -{
> - return pte_val(pte) >> PTE_RPN_SHIFT;
> -}
> -
> /* Generic modifiers for PTE bits */
> static inline pte_t pte_wrprotect(pte_t pte)
> {
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 4acc9690f599..c5baa3082a5a 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -104,6 +104,7 @@
> * and every thing below PAGE_SHIFT;
> */
> #define PTE_RPN_MASK (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
> +#define PTE_RPN_SHIFT PAGE_SHIFT
> /*
> * set of bits not changed in pmd_modify. Even though we have hash specific bits
> * in here, on radix we expect them to be zero.
> @@ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
> return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
> }
>
> -static inline unsigned long pte_pfn(pte_t pte)
> -{
> - return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
> -}
> -
> /* Generic modifiers for PTE bits */
> static inline pte_t pte_wrprotect(pte_t pte)
> {
> diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
> index 69a7dd47a9f0..03be8b22aaea 100644
> --- a/arch/powerpc/include/asm/nohash/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/pgtable.h
> @@ -101,8 +101,6 @@ static inline bool pte_access_permitted(pte_t pte, bool write)
> static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
> return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
> pgprot_val(pgprot)); }
> -static inline unsigned long pte_pfn(pte_t pte) {
> - return pte_val(pte) >> PTE_RPN_SHIFT; }
>
> /* Generic modifiers for PTE bits */
> static inline pte_t pte_exprotect(pte_t pte)
> @@ -279,7 +277,7 @@ static inline int pud_huge(pud_t pud)
> void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long address,
> pte_t *ptep, unsigned int nr);
> #else
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> +static inline void update_mmu_cache_range(struct vm_area_struct *vma,
> unsigned long address, pte_t *ptep, unsigned int nr) {}
> #endif
>
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index 656ecf2b10cd..491a2720f835 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -54,6 +54,12 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> /* Keep these as a macros to avoid include dependency mess */
> #define pte_page(x) pfn_to_page(pte_pfn(x))
> #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
> +
> +static inline unsigned long pte_pfn(pte_t pte)
> +{
> + return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
> +}
> +
> /*
> * Select all bits except the pfn
> */
> diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
> index f3cb91107a47..583b3098763f 100644
> --- a/arch/powerpc/mm/nohash/e500_hugetlbpage.c
> +++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
> @@ -178,7 +178,7 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
> *
> * This must always be called with the pte lock held.
> */
> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
> +void update_mmu_cache_range(struct vm_area_struct *vma, unsigned long address,
> pte_t *ptep, unsigned int nr)
> {
> if (is_vm_hugetlb_page(vma))
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index b3c7b874a7a2..db236b494845 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -208,7 +208,7 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> if (--nr == 0)
> break;
> ptep++;
> - pte = __pte(pte_val(pte) + PAGE_SIZE);
> + pte = __pte(pte_val(pte) + (1UL << PTE_RPN_SHIFT));
> addr += PAGE_SIZE;
> }
> }


What about:

void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte, unsigned int nr)
{
pgprot_t prot;
unsigned long pfn;
/*
* Make sure hardware valid bit is not set. We don't do
* tlb flush for this update.
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));

/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
* is called.
*/
pte = set_pte_filter(pte);

prot = pte_pgprot(pte);
pfn = pte_pfn(pte);
/* Perform the setting of the PTE */
for (;;) {
__set_pte_at(mm, addr, ptep, pfn_pte(pfn, prot), 0);
if (--nr == 0)
break;
ptep++;
pfn++;
addr += PAGE_SIZE;
}
}


Christophe