[42/55] mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups

From: Greg KH
Date: Fri Apr 29 2011 - 15:01:50 EST


2.6.38-stable review patch. If anyone has any objections, please let us know.

------------------

From: Andrea Arcangeli <aarcange@xxxxxxxxxx>

commit 78f11a255749d09025f54d4e2df4fbcb031530e2 upstream.

The huge_memory.c THP page fault was allowed to run if vm_ops was null
(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
setup a special vma->vm_ops and it would fallback to regular anonymous
memory) but other THP logics weren't fully activated for vmas with vm_file
not NULL (/dev/zero has a not NULL vma->vm_file).

So this removes the vm_file checks so that /dev/zero also can safely use
THP (the other albeit safer approach to fix this bug would have been to
prevent the THP initial page fault to run if vm_file was set).

After removing the vm_file checks, this also makes huge_memory.c stricter
in khugepaged for the DEBUG_VM=y case. It doesn't replace the vm_file
check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
only be allowed to exist before the first page fault, and in turn when
vma->anon_vma is null (so preventing khugepaged registration). So I tend
to think the previous comment saying if vm_file was set, VM_PFNMAP might
have been set and we could still be registered in khugepaged (despite
anon_vma was not NULL to be registered in khugepaged) was too paranoid.
The is_linear_pfn_mapping check is also I think superfluous (as described
by comment) but under DEBUG_VM it is safe to stay.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Reported-by: Caspar Zhang <bugs@xxxxxxxxxxxxxxx>
Acked-by: Mel Gorman <mel@xxxxxxxxx>
Acked-by: Rik van Riel <riel@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
include/linux/huge_mm.h | 2 +-
include/linux/mm.h | 3 ++-
mm/huge_memory.c | 43 ++++++++++++++++++++++++-------------------
3 files changed, 27 insertions(+), 21 deletions(-)

--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge
unsigned long end,
long adjust_next)
{
- if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+ if (!vma->anon_vma || vma->vm_ops)
return;
__vma_adjust_trans_huge(vma, start, end, adjust_next);
}
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void
#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)

/*
- * special vmas that are non-mergable, non-mlock()able
+ * Special vmas that are non-mergable, non-mlock()able.
+ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
*/
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)

--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1400,6 +1400,9 @@ out:
return ret;
}

+#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
+ VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+
int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
@@ -1408,11 +1411,7 @@ int hugepage_madvise(struct vm_area_stru
/*
* Be somewhat over-protective like KSM for now!
*/
- if (*vm_flags & (VM_HUGEPAGE |
- VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
- VM_MIXEDMAP | VM_SAO))
+ if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
@@ -1428,11 +1427,7 @@ int hugepage_madvise(struct vm_area_stru
/*
* Be somewhat over-protective like KSM for now!
*/
- if (*vm_flags & (VM_NOHUGEPAGE |
- VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
- VM_MIXEDMAP | VM_SAO))
+ if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
return -EINVAL;
*vm_flags &= ~VM_HUGEPAGE;
*vm_flags |= VM_NOHUGEPAGE;
@@ -1566,10 +1561,14 @@ int khugepaged_enter_vma_merge(struct vm
* page fault if needed.
*/
return 0;
- if (vma->vm_file || vma->vm_ops)
+ if (vma->vm_ops)
/* khugepaged not yet working on file or special mappings */
return 0;
- VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+ /*
+ * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+ * true too, verify it here.
+ */
+ VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (hstart < hend)
@@ -1818,12 +1817,15 @@ static void collapse_huge_page(struct mm
(vma->vm_flags & VM_NOHUGEPAGE))
goto out;

- /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
- if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+ if (!vma->anon_vma || vma->vm_ops)
goto out;
if (is_vma_temporary_stack(vma))
goto out;
- VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+ /*
+ * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+ * true too, verify it here.
+ */
+ VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);

pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
@@ -2056,13 +2058,16 @@ static unsigned int khugepaged_scan_mm_s
progress++;
continue;
}
- /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
- if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+ if (!vma->anon_vma || vma->vm_ops)
goto skip;
if (is_vma_temporary_stack(vma))
goto skip;
-
- VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+ /*
+ * If is_pfn_mapping() is true is_learn_pfn_mapping()
+ * must be true too, verify it here.
+ */
+ VM_BUG_ON(is_linear_pfn_mapping(vma) ||
+ vma->vm_flags & VM_NO_THP);

hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/