Re: hugetlb demand paging patch part [2/3]

From: 'David Gibson'
Date: Thu Apr 15 2004 - 21:44:00 EST


On Thu, Apr 15, 2004 at 10:27:58AM -0700, Chen, Kenneth W wrote:
> >>>> David Gibson wrote on Thursday, April 15, 2004 12:17 AM
> > > diff -Nurp linux-2.6.5/mm/memory.c linux-2.6.5.htlb/mm/memory.c
> > > +++ linux-2.6.5.htlb/mm/memory.c 2004-04-13 12:02:31.000000000 -0700
> > > @@ -769,11 +769,6 @@ int get_user_pages(struct task_struct *t
> > > if ((pages && vm_io) || !(flags & vma->vm_flags))
> > > return i ? : -EFAULT;
> > >
> > > - if (is_vm_hugetlb_page(vma)) {
> > > - i = follow_hugetlb_page(mm, vma, pages, vmas,
> > > - &start, &len, i);
> > > - continue;
> > > - }
> > > spin_lock(&mm->page_table_lock);
> > > do {
> > > struct page *map = NULL;
> >
> > Ok, I notice that you've removed the follow_hugtlb_page() function
> > (and from the arch specific stuff, as well). As far as I can tell,
> > this isn't actually related to demand-paging, in fact as far as I can
> > tell this function is unnecessary
>
> That was the reason I removed the function because it is no longer used
> with demand paging.

Yes, but what I'm saying is as far as I can tell it shouldn't be
necessary even *without* demand paging. Again I'm trying to separate
cleanups from new features in your patches.

> > should already work for huge pages. In particular the path in
> > get_user_pages() which can call handle_mm_fault() (which won't work on
> > hugepages without your patch) should never get triggered, since
> > hugepages are all prefaulted.
>
> > Does that sound right? In other words, do you think the patch below,
> > which just kills off follow_hugetlb_page() is safe, or have I missed
> > something?
> >
> > Index: working-2.6/mm/memory.c
> > ===================================================================
> > +++ working-2.6/mm/memory.c 2004-04-15 17:03:01.421905400 +1000
> > @@ -766,16 +766,13 @@
> > [snip]
> > spin_lock(&mm->page_table_lock);
> > do {
> > struct page *map;
> > int lookup_write = write;
> > while (!(map = follow_page(mm, start, lookup_write))) {
> > + /* hugepages should always be prefaulted */
> > + BUG_ON(is_vm_hugetlb_page(vma));
> > /*
> > * Shortcut for anonymous pages. We don't want
> > * to force the creation of pages tables for
>
> This portion is incorrect, because it will trigger BUG_ON all the time
> for faulting hugetlb page.

Ah, yes, I did that because I was (and am) thinking of the case
without demand paging. But I just realised that of course we can get
a fault in that case, if there's a mapping truncation at the wrong
time. Removing the BUG_ON does mean that its significant that the
never-called hugetlb nopage function is non-NULL, so that
untouched_anonymous_page() returns 0 before looking at the page
tables, which is a bit more subtle than I'd like. Nontheless, revised
patch below.

> Yes, killing follow_hugetlb_page() is safe because follow_page() takes
> care of hugetlb page. See 2nd patch posted earlier in
> hugetlb_demanding_generic.patch

Yes, I looked at it already. But what I'm asking about is applying
this patch *without* (or before) going to demand paging.

Index: working-2.6/mm/memory.c
===================================================================
--- working-2.6.orig/mm/memory.c 2004-04-13 11:42:42.000000000 +1000
+++ working-2.6/mm/memory.c 2004-04-16 11:46:31.935870496 +1000
@@ -766,16 +766,13 @@
|| !(flags & vma->vm_flags))
return i ? : -EFAULT;

- if (is_vm_hugetlb_page(vma)) {
- i = follow_hugetlb_page(mm, vma, pages, vmas,
- &start, &len, i);
- continue;
- }
spin_lock(&mm->page_table_lock);
do {
struct page *map;
int lookup_write = write;
while (!(map = follow_page(mm, start, lookup_write))) {
+ /* hugepages should always be prefaulted */
+ BUG_ON(is_vm_hugetlb_page(vma));
/*
* Shortcut for anonymous pages. We don't want
* to force the creation of pages tables for
Index: working-2.6/include/linux/hugetlb.h
===================================================================
--- working-2.6.orig/include/linux/hugetlb.h 2004-04-13 11:42:41.000000000 +1000
+++ working-2.6/include/linux/hugetlb.h 2004-04-16 11:46:31.947868672 +1000
@@ -12,7 +12,6 @@

int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void *, size_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
@@ -64,7 +63,6 @@
return 0;
}

-#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
#define follow_huge_addr(mm, vma, addr, write) 0
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
Index: working-2.6/arch/ppc64/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/ppc64/mm/hugetlbpage.c 2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/ppc64/mm/hugetlbpage.c 2004-04-16 11:46:31.950868216 +1000
@@ -288,52 +288,6 @@
return 0;
}

-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page **pages, struct vm_area_struct **vmas,
- unsigned long *position, int *length, int i)
-{
- unsigned long vpfn, vaddr = *position;
- int remainder = *length;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
-
- vpfn = vaddr/PAGE_SIZE;
- while (vaddr < vma->vm_end && remainder) {
- BUG_ON(!in_hugepage_area(mm->context, vaddr));
-
- if (pages) {
- hugepte_t *pte;
- struct page *page;
-
- pte = hugepte_offset(mm, vaddr);
-
- /* hugetlb should be locked, and hence, prefaulted */
- WARN_ON(!pte || hugepte_none(*pte));
-
- page = &hugepte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
-
- WARN_ON(!PageCompound(page));
-
- get_page(page);
- pages[i] = page;
- }
-
- if (vmas)
- vmas[i] = vma;
-
- vaddr += PAGE_SIZE;
- ++vpfn;
- --remainder;
- ++i;
- }
-
- *length = remainder;
- *position = vaddr;
-
- return i;
-}
-
struct page *
follow_huge_addr(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, int write)
Index: working-2.6/arch/i386/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/i386/mm/hugetlbpage.c 2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/i386/mm/hugetlbpage.c 2004-04-16 11:49:11.914912248 +1000
@@ -93,65 +93,27 @@
return -ENOMEM;
}

-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page **pages, struct vm_area_struct **vmas,
- unsigned long *position, int *length, int i)
-{
- unsigned long vpfn, vaddr = *position;
- int remainder = *length;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
-
- vpfn = vaddr/PAGE_SIZE;
- while (vaddr < vma->vm_end && remainder) {
-
- if (pages) {
- pte_t *pte;
- struct page *page;
-
- pte = huge_pte_offset(mm, vaddr);
-
- /* hugetlb should be locked, and hence, prefaulted */
- WARN_ON(!pte || pte_none(*pte));
-
- page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
-
- WARN_ON(!PageCompound(page));
-
- get_page(page);
- pages[i] = page;
- }
-
- if (vmas)
- vmas[i] = vma;
-
- vaddr += PAGE_SIZE;
- ++vpfn;
- --remainder;
- ++i;
- }
-
- *length = remainder;
- *position = vaddr;
-
- return i;
-}
-
#if 0 /* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, int write)
{
- unsigned long start = address;
- int length = 1;
- int nr;
+ int vpfn = vaddr / PAGE_SIZE;
struct page *page;
+ pte_t *pte;

- nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0);
- if (nr == 1)
- return page;
- return NULL;
+ pte = huge_pte_offset(mm, address);
+
+ /* PTE could be absent if there's been a mapping truncation */
+ if (! pte || pte_none(*pte))
+ return NULL;
+
+ page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+ WARN_ON(!PageCompound(page));
+
+ get_page(page);
+ return page;
}

/*
Index: working-2.6/arch/sparc64/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/sparc64/mm/hugetlbpage.c 2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/sparc64/mm/hugetlbpage.c 2004-04-16 11:46:31.961866544 +1000
@@ -121,47 +121,6 @@
return -ENOMEM;
}

-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page **pages, struct vm_area_struct **vmas,
- unsigned long *position, int *length, int i)
-{
- unsigned long vaddr = *position;
- int remainder = *length;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
-
- while (vaddr < vma->vm_end && remainder) {
- if (pages) {
- pte_t *pte;
- struct page *page;
-
- pte = huge_pte_offset(mm, vaddr);
-
- /* hugetlb should be locked, and hence, prefaulted */
- BUG_ON(!pte || pte_none(*pte));
-
- page = pte_page(*pte);
-
- WARN_ON(!PageCompound(page));
-
- get_page(page);
- pages[i] = page;
- }
-
- if (vmas)
- vmas[i] = vma;
-
- vaddr += PAGE_SIZE;
- --remainder;
- ++i;
- }
-
- *length = remainder;
- *position = vaddr;
-
- return i;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address, int write)
Index: working-2.6/arch/ia64/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/ia64/mm/hugetlbpage.c 2004-04-14 12:22:48.000000000 +1000
+++ working-2.6/arch/ia64/mm/hugetlbpage.c 2004-04-16 11:46:31.963866240 +1000
@@ -113,43 +113,6 @@
return -ENOMEM;
}

-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page **pages, struct vm_area_struct **vmas,
- unsigned long *st, int *length, int i)
-{
- pte_t *ptep, pte;
- unsigned long start = *st;
- unsigned long pstart;
- int len = *length;
- struct page *page;
-
- do {
- pstart = start & HPAGE_MASK;
- ptep = huge_pte_offset(mm, start);
- pte = *ptep;
-
-back1:
- page = pte_page(pte);
- if (pages) {
- page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
- get_page(page);
- pages[i] = page;
- }
- if (vmas)
- vmas[i] = vma;
- i++;
- len--;
- start += PAGE_SIZE;
- if (((start & HPAGE_MASK) == pstart) && len &&
- (start < vma->vm_end))
- goto back1;
- } while (len && start < vma->vm_end);
- *length = len;
- *st = start;
- return i;
-}
-
struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
{
if (mm->used_hugetlb) {
Index: working-2.6/arch/sh/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/sh/mm/hugetlbpage.c 2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/sh/mm/hugetlbpage.c 2004-04-16 11:46:31.971865024 +1000
@@ -124,47 +124,6 @@
return -ENOMEM;
}

-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page **pages, struct vm_area_struct **vmas,
- unsigned long *position, int *length, int i)
-{
- unsigned long vaddr = *position;
- int remainder = *length;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
-
- while (vaddr < vma->vm_end && remainder) {
- if (pages) {
- pte_t *pte;
- struct page *page;
-
- pte = huge_pte_offset(mm, vaddr);
-
- /* hugetlb should be locked, and hence, prefaulted */
- BUG_ON(!pte || pte_none(*pte));
-
- page = pte_page(*pte);
-
- WARN_ON(!PageCompound(page));
-
- get_page(page);
- pages[i] = page;
- }
-
- if (vmas)
- vmas[i] = vma;
-
- vaddr += PAGE_SIZE;
- --remainder;
- ++i;
- }
-
- *length = remainder;
- *position = vaddr;
-
- return i;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address, int write)

--
David Gibson | For every complex problem there is a
david AT gibson.dropbear.id.au | solution which is simple, neat and
| wrong.
http://www.ozlabs.org/people/dgibson
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/