Re: Memory hotplug softlock issue

From: Baoquan He
Date: Fri Nov 16 2018 - 23:22:18 EST


On 11/16/18 at 10:14am, Michal Hocko wrote:
> Could you try to apply this debugging patch on top please? It will dump
> stack trace for each reference count elevation for one page that fails
> to migrate after multiple passes.

Thanks, applied and fixed two code issues. The dmesg has been sent to
you privately, please check. The dmesg is overflow, if you need the
earlier message, I will retest.

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index b64ebf253381..f76e2c498f31 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -72,7 +72,7 @@ static inline int page_count(struct page *page)
return atomic_read(&compound_head(page)->_refcount);
}

-struct page *page_to_track;
+extern struct page *page_to_track;
static inline void set_page_count(struct page *page, int v)
{
atomic_set(&page->_refcount, v);
diff --git a/mm/migrate.c b/mm/migrate.c
index 9b2e395a3d68..42c7499c43b9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1339,6 +1339,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
}

struct page *page_to_track;
+EXPORT_SYMBOL_GPL(page_to_track);

/*
* migrate_pages - migrate the pages specified in a list, to the free pages

>
> diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
> index 14d14beb1f7f..b64ebf253381 100644
> --- a/include/linux/page_ref.h
> +++ b/include/linux/page_ref.h
> @@ -72,9 +72,12 @@ static inline int page_count(struct page *page)
> return atomic_read(&compound_head(page)->_refcount);
> }
>
> +struct page *page_to_track;
> static inline void set_page_count(struct page *page, int v)
> {
> atomic_set(&page->_refcount, v);
> + if (page == page_to_track)
> + dump_stack();
> if (page_ref_tracepoint_active(__tracepoint_page_ref_set))
> __page_ref_set(page, v);
> }
> @@ -91,6 +94,8 @@ static inline void init_page_count(struct page *page)
> static inline void page_ref_add(struct page *page, int nr)
> {
> atomic_add(nr, &page->_refcount);
> + if (page == page_to_track)
> + dump_stack();
> if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
> __page_ref_mod(page, nr);
> }
> @@ -105,6 +110,8 @@ static inline void page_ref_sub(struct page *page, int nr)
> static inline void page_ref_inc(struct page *page)
> {
> atomic_inc(&page->_refcount);
> + if (page == page_to_track)
> + dump_stack();
> if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
> __page_ref_mod(page, 1);
> }
> @@ -129,6 +136,8 @@ static inline int page_ref_inc_return(struct page *page)
> {
> int ret = atomic_inc_return(&page->_refcount);
>
> + if (page == page_to_track)
> + dump_stack();
> if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
> __page_ref_mod_and_return(page, 1, ret);
> return ret;
> @@ -156,6 +165,8 @@ static inline int page_ref_add_unless(struct page *page, int nr, int u)
> {
> int ret = atomic_add_unless(&page->_refcount, nr, u);
>
> + if (page == page_to_track)
> + dump_stack();
> if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless))
> __page_ref_mod_unless(page, nr, ret);
> return ret;
> diff --git a/mm/migrate.c b/mm/migrate.c
> index f7e4bfdc13b7..9b2e395a3d68 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1338,6 +1338,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
> return rc;
> }
>
> +struct page *page_to_track;
> +
> /*
> * migrate_pages - migrate the pages specified in a list, to the free pages
> * supplied as the target for the page migration
> @@ -1375,6 +1377,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
> if (!swapwrite)
> current->flags |= PF_SWAPWRITE;
>
> + page_to_track = NULL;
> for(pass = 0; pass < 10 && retry; pass++) {
> retry = 0;
>
> @@ -1417,6 +1420,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
> goto out;
> case -EAGAIN:
> retry++;
> + if (pass > 1 && !page_to_track)
> + page_to_track = page;
> break;
> case MIGRATEPAGE_SUCCESS:
> nr_succeeded++;
> --
> Michal Hocko
> SUSE Labs