[PATCH] mm,hwpoison,hugetlb: defer dissolve hwpoison hugepage when allocating vmemmap failed

From: luofei
Date: Tue Jul 05 2022 - 02:52:30 EST


When dissolving hwpoison hugepage, if the allocation of vmemmap page
failed, the faulty page should not be put back on the hugepage free
list, which will cause the faulty pages to be reused. It's better to
postpone the reexecution of dissolve operation.

Meanwhile when the page fault handling program calls
dissolve_free_huge_page() to dissolve the faulty page, the basic page
fault processing operation(such as migration pages and unmap etc)
has actually completed. There is no need to return -ENOMEM error code
to the upper layer for temporarily vmemmap page allocation failure,
which will cause the caller to make a wrong judgment. So just defer
dissolve and return success.

Signed-off-by: luofei <luofei@xxxxxxxxxxxx>
---
mm/hugetlb.c | 34 +++++++++++++++++++++-------------
1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ca081078e814..db25458eb0a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -90,6 +90,9 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;

/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
+static LLIST_HEAD(hpage_freelist);
+static void free_hpage_workfn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(free_hpage_work, free_hpage_workfn);

static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
@@ -1535,15 +1538,21 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;

- if (hugetlb_vmemmap_restore(h, page))
+ if (hugetlb_vmemmap_restore(h, page)) {
+ if (unlikely(PageHWPoison(page))) {
+ llist_add((struct llist_node *)&page->mapping, &hpage_freelist);
+ schedule_delayed_work(&free_hpage_work, HZ);
+ goto out;
+ }
goto fail;
+ }

/*
* Move PageHWPoison flag from head page to the raw error pages,
* which makes any healthy subpages reusable.
*/
if (unlikely(PageHWPoison(page) && hugetlb_clear_page_hwpoison(page)))
- goto fail;
+ goto out;

for (i = 0; i < pages_per_huge_page(h);
i++, subpage = mem_map_next(subpage, page, i)) {
@@ -1574,6 +1583,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
*/
add_hugetlb_page(h, page, true);
spin_unlock_irq(&hugetlb_lock);
+out:
+ return;
}

/*
@@ -1587,8 +1598,6 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
* to be cleared in free_hpage_workfn() anyway, it is reused as the llist_node
* structure of a lockless linked list of huge pages to be freed.
*/
-static LLIST_HEAD(hpage_freelist);
-
static void free_hpage_workfn(struct work_struct *work)
{
struct llist_node *node;
@@ -1616,12 +1625,11 @@ static void free_hpage_workfn(struct work_struct *work)
cond_resched();
}
}
-static DECLARE_WORK(free_hpage_work, free_hpage_workfn);

static inline void flush_free_hpage_work(struct hstate *h)
{
if (hugetlb_vmemmap_optimizable(h))
- flush_work(&free_hpage_work);
+ flush_delayed_work(&free_hpage_work);
}

static void update_and_free_page(struct hstate *h, struct page *page,
@@ -1634,13 +1642,9 @@ static void update_and_free_page(struct hstate *h, struct page *page,

/*
* Defer freeing to avoid using GFP_ATOMIC to allocate vmemmap pages.
- *
- * Only call schedule_work() if hpage_freelist is previously
- * empty. Otherwise, schedule_work() had been called but the workfn
- * hasn't retrieved the list yet.
*/
- if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
- schedule_work(&free_hpage_work);
+ llist_add((struct llist_node *)&page->mapping, &hpage_freelist);
+ schedule_delayed_work(&free_hpage_work, 0);
}

static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
@@ -2118,11 +2122,15 @@ int dissolve_free_huge_page(struct page *page)
rc = hugetlb_vmemmap_restore(h, head);
if (!rc) {
update_and_free_page(h, head, false);
- } else {
+ } else if (!PageHWPoison(head)) {
spin_lock_irq(&hugetlb_lock);
add_hugetlb_page(h, head, false);
h->max_huge_pages++;
spin_unlock_irq(&hugetlb_lock);
+ } else {
+ llist_add((struct llist_node *)&head->mapping, &hpage_freelist);
+ schedule_delayed_work(&free_hpage_work, HZ);
+ rc = 0;
}

return rc;
--
2.27.0