Re: [PATCHv3 14/24] thp: implement new split_huge_page()

From: Kirill A. Shutemov
Date: Mon Feb 16 2015 - 10:58:07 EST


On Thu, Feb 12, 2015 at 02:24:40PM -0500, Sasha Levin wrote:
> On 02/12/2015 12:07 PM, Sasha Levin wrote:
> > On 02/12/2015 11:18 AM, Kirill A. Shutemov wrote:
> >> > +void __get_page_tail(struct page *page);
> >> > static inline void get_page(struct page *page)
> >> > {
> >> > - struct page *page_head = compound_head(page);
> >> > - VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page);
> >> > - atomic_inc(&page_head->_count);
> >> > + if (unlikely(PageTail(page)))
> >> > + return __get_page_tail(page);
> >> > +
> >> > + /*
> >> > + * Getting a normal page or the head of a compound page
> >> > + * requires to already have an elevated page->_count.
> >> > + */
> >> > + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
> > This BUG_ON seems to get hit:
>
> Plus a few more different traces:

Sasha, could you check if the patch below makes any better?

diff --git a/mm/gup.c b/mm/gup.c
index 22585ef667d9..10d98d39bc03 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -211,12 +211,19 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
if (flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
- get_page(page);
- spin_unlock(ptl);
- lock_page(page);
- ret = split_huge_page(page);
- unlock_page(page);
- put_page(page);
+ if (is_huge_zero_page(page)) {
+ spin_unlock(ptl);
+ ret = 0;
+ split_huge_pmd(vma, pmd, address);
+ } else {
+ get_page(page);
+ spin_unlock(ptl);
+ lock_page(page);
+ ret = split_huge_page(page);
+ unlock_page(page);
+ put_page(page);
+ }
+
return ret ? ERR_PTR(ret) :
follow_page_pte(vma, address, pmd, flags);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2667938a3d2c..4d69baa41a6c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1821,7 +1821,7 @@ static int __split_huge_page_refcount(struct anon_vma *anon_vma,
int tail_mapcount = 0;

freeze_page(anon_vma, page);
- BUG_ON(compound_mapcount(page));
+ VM_BUG_ON_PAGE(compound_mapcount(page), page);

/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irq(&zone->lru_lock);
diff --git a/mm/memory.c b/mm/memory.c
index f81bcd539ca0..5153fd0d8e5c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2231,7 +2231,7 @@ unlock:
pte_unmap_unlock(page_table, ptl);
if (mmun_end > mmun_start)
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- if (old_page) {
+ if (old_page && !PageTransCompound(old_page)) {
/*
* Don't let another task, with possibly unlocked vma,
* keep the mlocked page.
diff --git a/mm/mlock.c b/mm/mlock.c
index 40c6ab590cde..6afef15f80ab 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -502,39 +502,26 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
&page_mask);

- if (page && !IS_ERR(page)) {
- if (PageTransHuge(page)) {
- lock_page(page);
- /*
- * Any THP page found by follow_page_mask() may
- * have gotten split before reaching
- * munlock_vma_page(), so we need to recompute
- * the page_mask here.
- */
- page_mask = munlock_vma_page(page);
- unlock_page(page);
- put_page(page); /* follow_page_mask() */
- } else {
- /*
- * Non-huge pages are handled in batches via
- * pagevec. The pin from follow_page_mask()
- * prevents them from collapsing by THP.
- */
- pagevec_add(&pvec, page);
- zone = page_zone(page);
- zoneid = page_zone_id(page);
+ if (page && !IS_ERR(page) && !PageTransCompound(page)) {
+ /*
+ * Non-huge pages are handled in batches via
+ * pagevec. The pin from follow_page_mask()
+ * prevents them from collapsing by THP.
+ */
+ pagevec_add(&pvec, page);
+ zone = page_zone(page);
+ zoneid = page_zone_id(page);

- /*
- * Try to fill the rest of pagevec using fast
- * pte walk. This will also update start to
- * the next page to process. Then munlock the
- * pagevec.
- */
- start = __munlock_pagevec_fill(&pvec, vma,
- zoneid, start, end);
- __munlock_pagevec(&pvec, zone);
- goto next;
- }
+ /*
+ * Try to fill the rest of pagevec using fast
+ * pte walk. This will also update start to
+ * the next page to process. Then munlock the
+ * pagevec.
+ */
+ start = __munlock_pagevec_fill(&pvec, vma,
+ zoneid, start, end);
+ __munlock_pagevec(&pvec, zone);
+ goto next;
}
/* It's a bug to munlock in the middle of a THP page */
VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
--
Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/