[RFC PATCH 04/16] mm: thp: 1GB THP copy on write implementation.

From: Zi Yan
Date: Wed Sep 02 2020 - 14:07:05 EST


From: Zi Yan <ziy@xxxxxxxxxx>

COW on 1GB THPs will fall back to 2MB THPs if 1GB THP is not available.

Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
---
arch/x86/include/asm/pgalloc.h | 9 ++++++
include/linux/huge_mm.h | 5 ++++
mm/huge_memory.c | 54 ++++++++++++++++++++++++++++++++++
mm/memory.c | 2 +-
mm/swapfile.c | 4 ++-
5 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index fae13467d3e1..31221269c387 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -98,6 +98,15 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,

#define pmd_pgtable(pmd) pmd_page(pmd)

+static inline void pud_populate_with_pgtable(struct mm_struct *mm, pud_t *pud,
+ struct page *pte)
+{
+ unsigned long pfn = page_to_pfn(pte);
+
+ paravirt_alloc_pmd(mm, pfn);
+ set_pud(pud, __pud(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+}
+
#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one_page_with_ptes(struct mm_struct *mm, unsigned long addr)
{
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7528652400e4..0c20a8ea6911 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,6 +19,7 @@ extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
extern int do_huge_pud_anonymous_page(struct vm_fault *vmf);
+extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud);
#else
static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
{
@@ -27,6 +28,10 @@ extern int do_huge_pud_anonymous_page(struct vm_fault *vmf)
{
return VM_FAULT_FALLBACK;
}
+extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud)
+{
+ return VM_FAULT_FALLBACK;
+}
#endif

extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ec3847392208..6da9b02501b7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1334,6 +1334,60 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
unlock:
spin_unlock(vmf->ptl);
}
+
+vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct page *page = NULL;
+ unsigned long haddr = vmf->address & HPAGE_PUD_MASK;
+
+ vmf->ptl = pud_lockptr(vma->vm_mm, vmf->pud);
+ VM_BUG_ON_VMA(!vma->anon_vma, vma);
+
+ if (is_huge_zero_pud(orig_pud))
+ goto fallback;
+
+ spin_lock(vmf->ptl);
+
+ if (unlikely(!pud_same(*vmf->pud, orig_pud))) {
+ spin_unlock(vmf->ptl);
+ return 0;
+ }
+
+ page = pud_page(orig_pud);
+ VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
+
+ /* Lock page for reuse_swap_page() */
+ if (!trylock_page(page)) {
+ get_page(page);
+ spin_unlock(vmf->ptl);
+ lock_page(page);
+ spin_lock(vmf->ptl);
+ if (unlikely(!pud_same(*vmf->pud, orig_pud))) {
+ unlock_page(page);
+ put_page(page);
+ return 0;
+ }
+ put_page(page);
+ }
+ if (reuse_swap_page(page, NULL)) {
+ pud_t entry;
+
+ entry = pud_mkyoung(orig_pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, haddr, vmf->pud, entry, 1))
+ update_mmu_cache_pud(vma, vmf->address, vmf->pud);
+ unlock_page(page);
+ spin_unlock(vmf->ptl);
+ return VM_FAULT_WRITE;
+ }
+ unlock_page(page);
+ spin_unlock(vmf->ptl);
+fallback:
+ __split_huge_pud(vma, vmf->pud, vmf->address);
+ return VM_FAULT_FALLBACK;
+}
+
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
diff --git a/mm/memory.c b/mm/memory.c
index 6f86294438fd..b88587256bc1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4165,7 +4165,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vmf->vma))
- return VM_FAULT_FALLBACK;
+ return do_huge_pud_wp_page(vmf, orig_pud);
if (vmf->vma->vm_ops->huge_fault)
return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 20012c0c0252..e3f771c2ad83 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1635,7 +1635,9 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);

- if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
+ if (!IS_ENABLED(CONFIG_THP_SWAP) ||
+ unlikely(compound_order(compound_head(page)) == HPAGE_PUD_ORDER) ||
+ likely(!PageTransCompound(page))) {
mapcount = page_trans_huge_mapcount(page, total_mapcount);
if (PageSwapCache(page))
swapcount = page_swapcount(page);
--
2.28.0