Re: [syzbot] [mm?] WARNING: refcount bug in __reset_page_owner

From: Tetsuo Handa
Date: Wed Mar 20 2024 - 04:07:06 EST


#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master

diff --git a/mm/page_owner.c b/mm/page_owner.c
index e7139952ffd9..58fc7b451f75 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -27,6 +27,7 @@ struct page_owner {
gfp_t gfp_mask;
depot_stack_handle_t handle;
depot_stack_handle_t free_handle;
+ depot_stack_handle_t migrate_handle;
u64 ts_nsec;
u64 free_ts_nsec;
char comm[TASK_COMM_LEN];
@@ -183,9 +184,11 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
spin_unlock_irqrestore(&stack_list_lock, flags);
}

-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+ int old = REFCOUNT_SATURATED;

if (!stack_record)
return;
@@ -197,22 +200,21 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
* Since we do not use STACK_DEPOT_FLAG_GET API, let us
* set a refcount of 1 ourselves.
*/
- if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
- int old = REFCOUNT_SATURATED;
-
- if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
- /* Add the new stack_record to our list */
- add_stack_record_to_list(stack_record, gfp_mask);
- }
- refcount_inc(&stack_record->count);
+ if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
+ add_stack_record_to_list(stack_record, gfp_mask);
+ refcount_add(nr_base_pages, &stack_record->count);
}

-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);

- if (stack_record)
- refcount_dec(&stack_record->count);
+ if (!stack_record)
+ return;
+
+ if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+ WARN(1, "%s refcount went to 0 for %u handle\n", __func__, handle);
}

void __reset_page_owner(struct page *page, unsigned short order)
@@ -229,7 +231,15 @@ void __reset_page_owner(struct page *page, unsigned short order)
return;

page_owner = get_page_owner(page_ext);
- alloc_handle = page_owner->handle;
+ /*
+ * If this page was allocated for migration purposes, its handle doesn't
+ * reference the stack it was allocated from, so make sure to use the
+ * migrate_handle in order to subtract it from the right stack.
+ */
+ if (!page_owner->migrate_handle)
+ alloc_handle = page_owner->handle;
+ else
+ alloc_handle = page_owner->migrate_handle;

handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
for (i = 0; i < (1 << order); i++) {
@@ -250,7 +260,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
* the machinery is not ready yet, we cannot decrement
* their refcount either.
*/
- dec_stack_record_count(alloc_handle);
+ dec_stack_record_count(alloc_handle, 1 << order);
}

static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -266,6 +276,7 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
page_owner->handle = handle;
page_owner->order = order;
page_owner->gfp_mask = gfp_mask;
+ page_owner->migrate_handle = 0;
page_owner->last_migrate_reason = -1;
page_owner->pid = current->pid;
page_owner->tgid = current->tgid;
@@ -292,7 +303,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
return;
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
page_ext_put(page_ext);
- inc_stack_record_count(handle, gfp_mask);
+ inc_stack_record_count(handle, gfp_mask, 1 << order);
}

void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -347,6 +358,7 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
new_page_owner->gfp_mask = old_page_owner->gfp_mask;
new_page_owner->last_migrate_reason =
old_page_owner->last_migrate_reason;
+ new_page_owner->migrate_handle = new_page_owner->handle;
new_page_owner->handle = old_page_owner->handle;
new_page_owner->pid = old_page_owner->pid;
new_page_owner->tgid = old_page_owner->tgid;
@@ -848,11 +860,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
return stack;
}

-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;

static int stack_print(struct seq_file *m, void *v)
{
- int i, stack_count;
+ int i, nr_base_pages;
struct stack *stack = v;
unsigned long *entries;
unsigned long nr_entries;
@@ -863,14 +875,14 @@ static int stack_print(struct seq_file *m, void *v)

nr_entries = stack_record->size;
entries = stack_record->entries;
- stack_count = refcount_read(&stack_record->count) - 1;
+ nr_base_pages = refcount_read(&stack_record->count) - 1;

- if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+ if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
return 0;

for (i = 0; i < nr_entries; i++)
seq_printf(m, " %pS\n", (void *)entries[i]);
- seq_printf(m, "stack_count: %d\n\n", stack_count);
+ seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);

return 0;
}
@@ -900,13 +912,13 @@ static const struct file_operations page_owner_stack_operations = {

static int page_owner_threshold_get(void *data, u64 *val)
{
- *val = READ_ONCE(page_owner_stack_threshold);
+ *val = READ_ONCE(page_owner_pages_threshold);
return 0;
}

static int page_owner_threshold_set(void *data, u64 val)
{
- WRITE_ONCE(page_owner_stack_threshold, val);
+ WRITE_ONCE(page_owner_pages_threshold, val);
return 0;
}