[PATCH v3 2/3] mm: hugetlb: introduce PageHugeActive flag

From: Naoya Horiguchi
Date: Tue Mar 31 2015 - 04:54:10 EST


We are not safe from calling isolate_huge_page() on a hugepage concurrently,
which can make the victim hugepage in invalid state and results in BUG_ON().

The root problem of this is that we don't have any information on struct page
(so easily accessible) about hugepages' activeness. Note that hugepages'
activeness means just being linked to hstate->hugepage_activelist, which is
not the same as normal pages' activeness represented by PageActive flag.

Normal pages are isolated by isolate_lru_page() which prechecks PageLRU before
isolation, so let's do similarly for hugetlb with a new PageHugeActive flag.

Set/ClearPageHugeActive should be called within hugetlb_lock. But hugetlb_cow()
and hugetlb_no_page() don't do this, being justified because in these function
SetPageHugeActive is called right after the hugepage is allocated and no other
thread tries to isolate it.

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
ChangeLog v2->v3:
- Use PagePrivate of the first tail page to show hugepage's activeness instead
of PageLRU
- drop ClearPageLRU in dequeue_hwpoisoned_huge_page() (which was wrong)
- fix return value of isolate_huge_page() (using ret)
- move __put_compound_page() part to a separate patch
- drop "Cc: stable" tag because this is not a simple fix

ChangeLog v1->v2:
- call isolate_huge_page() in soft_offline_huge_page() instead of list_move()
---
mm/hugetlb.c | 41 ++++++++++++++++++++++++++++++++++++++---
mm/memory-failure.c | 14 ++++++++++++--
2 files changed, 50 insertions(+), 5 deletions(-)

diff --git v4.0-rc6.orig/mm/hugetlb.c v4.0-rc6/mm/hugetlb.c
index c41b2a0ee273..05e0233d30d7 100644
--- v4.0-rc6.orig/mm/hugetlb.c
+++ v4.0-rc6/mm/hugetlb.c
@@ -855,6 +855,31 @@ struct hstate *size_to_hstate(unsigned long size)
return NULL;
}

+/*
+ * Page flag to show that the hugepage is "active/in-use" (i.e. being linked to
+ * hstate->hugepage_activelist.)
+ *
+ * This function can be called for tail pages, but never returns true for them.
+ */
+int PageHugeActive(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageHuge(page), page);
+ return PageHead(page) && PagePrivate(&page[1]);
+}
+
+/* never called for tail page */
+void SetPageHugeActive(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+ SetPagePrivate(&page[1]);
+}
+
+void ClearPageHugeActive(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+ ClearPagePrivate(&page[1]);
+}
+
void free_huge_page(struct page *page)
{
/*
@@ -875,6 +900,7 @@ void free_huge_page(struct page *page)
ClearPagePrivate(page);

spin_lock(&hugetlb_lock);
+ ClearPageHugeActive(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
if (restore_reserve)
@@ -2891,6 +2917,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
copy_user_huge_page(new_page, old_page, address, vma,
pages_per_huge_page(h));
__SetPageUptodate(new_page);
+ SetPageHugeActive(new_page);

mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
@@ -3003,6 +3030,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
clear_huge_page(page, address, pages_per_huge_page(h));
__SetPageUptodate(page);
+ SetPageHugeActive(page);

if (vma->vm_flags & VM_MAYSHARE) {
int err;
@@ -3812,19 +3840,26 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)

bool isolate_huge_page(struct page *page, struct list_head *list)
{
+ bool ret = true;
+
VM_BUG_ON_PAGE(!PageHead(page), page);
- if (!get_page_unless_zero(page))
- return false;
spin_lock(&hugetlb_lock);
+ if (!PageHugeActive(page) || !get_page_unless_zero(page)) {
+ ret = false;
+ goto unlock;
+ }
+ ClearPageHugeActive(page);
list_move_tail(&page->lru, list);
+unlock:
spin_unlock(&hugetlb_lock);
- return true;
+ return ret;
}

void putback_active_hugepage(struct page *page)
{
VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock);
+ SetPageHugeActive(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
spin_unlock(&hugetlb_lock);
put_page(page);
diff --git v4.0-rc6.orig/mm/memory-failure.c v4.0-rc6/mm/memory-failure.c
index d487f8dc6d39..1d86cca8de26 100644
--- v4.0-rc6.orig/mm/memory-failure.c
+++ v4.0-rc6/mm/memory-failure.c
@@ -1540,8 +1540,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
}
unlock_page(hpage);

- /* Keep page count to indicate a given hugepage is isolated. */
- list_move(&hpage->lru, &pagelist);
+ ret = isolate_huge_page(hpage, &pagelist);
+ if (ret) {
+ /*
+ * get_any_page() and isolate_huge_page() takes a refcount each,
+ * so need to drop one here.
+ */
+ put_page(hpage);
+ } else {
+ pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
+ return -EBUSY;
+ }
+
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
--
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/