[PATCH] net: page_pool: fix refcounting issues with fragmented allocation

From: Felix Fietkau
Date: Tue Jan 24 2023 - 07:43:18 EST


While testing fragmented page_pool allocation in the mt76 driver, I was able
to reliably trigger page refcount underflow issues, which did not occur with
full-page page_pool allocation.
It appears to me, that handling refcounting in two separate counters
(page->pp_frag_count and page refcount) is racy when page refcount gets
incremented by code dealing with skb fragments directly, and
page_pool_return_skb_page is called multiple times for the same fragment.

Dropping page->pp_frag_count and relying entirely on the page refcount makes
these underflow issues and crashes go away.

Cc: Lorenzo Bianconi <lorenzo@xxxxxxxxxx>
Signed-off-by: Felix Fietkau <nbd@xxxxxxxx>
---
include/linux/mm_types.h | 17 +++++------------
include/net/page_pool.h | 19 ++++---------------
net/core/page_pool.c | 12 ++++--------
3 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9757067c3053..96ec3b19a86d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -125,18 +125,11 @@ struct page {
struct page_pool *pp;
unsigned long _pp_mapping_pad;
unsigned long dma_addr;
- union {
- /**
- * dma_addr_upper: might require a 64-bit
- * value on 32-bit architectures.
- */
- unsigned long dma_addr_upper;
- /**
- * For frag page support, not supported in
- * 32-bit architectures with 64-bit DMA.
- */
- atomic_long_t pp_frag_count;
- };
+ /**
+ * dma_addr_upper: might require a 64-bit
+ * value on 32-bit architectures.
+ */
+ unsigned long dma_addr_upper;
};
struct { /* Tail pages of compound page */
unsigned long compound_head; /* Bit zero is set */
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 813c93499f20..28e1fdbdcd53 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -279,14 +279,14 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,

static inline void page_pool_fragment_page(struct page *page, long nr)
{
- atomic_long_set(&page->pp_frag_count, nr);
+ page_ref_add(page, nr);
}

static inline long page_pool_defrag_page(struct page *page, long nr)
{
long ret;

- /* If nr == pp_frag_count then we have cleared all remaining
+ /* If nr == page_ref_count then we have cleared all remaining
* references to the page. No need to actually overwrite it, instead
* we can leave this to be overwritten by the calling function.
*
@@ -295,22 +295,14 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
* especially when dealing with a page that may be partitioned
* into only 2 or 3 pieces.
*/
- if (atomic_long_read(&page->pp_frag_count) == nr)
+ if (page_ref_count(page) == nr)
return 0;

- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ ret = page_ref_sub_return(page, nr);
WARN_ON(ret < 0);
return ret;
}

-static inline bool page_pool_is_last_frag(struct page_pool *pool,
- struct page *page)
-{
- /* If fragments aren't enabled or count is 0 we were the last user */
- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
- (page_pool_defrag_page(page, 1) == 0);
-}
-
static inline void page_pool_put_page(struct page_pool *pool,
struct page *page,
unsigned int dma_sync_size,
@@ -320,9 +312,6 @@ static inline void page_pool_put_page(struct page_pool *pool,
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(pool, page))
- return;
-
page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
#endif
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b203d8660e4..0defcadae225 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -25,7 +25,7 @@
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)

-#define BIAS_MAX LONG_MAX
+#define BIAS_MAX(pool) (PAGE_SIZE << ((pool)->p.order))

#ifdef CONFIG_PAGE_POOL_STATS
/* alloc_stat_inc is intended to be used in softirq context */
@@ -619,10 +619,6 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);

- /* It is not the last user for the page frag case */
- if (!page_pool_is_last_frag(pool, page))
- continue;
-
page = __page_pool_put_page(pool, page, -1, false);
/* Approved for bulk recycling in ptr_ring cache */
if (page)
@@ -659,7 +655,7 @@ EXPORT_SYMBOL(page_pool_put_page_bulk);
static struct page *page_pool_drain_frag(struct page_pool *pool,
struct page *page)
{
- long drain_count = BIAS_MAX - pool->frag_users;
+ long drain_count = BIAS_MAX(pool) - pool->frag_users;

/* Some user is still using the page frag */
if (likely(page_pool_defrag_page(page, drain_count)))
@@ -678,7 +674,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,

static void page_pool_free_frag(struct page_pool *pool)
{
- long drain_count = BIAS_MAX - pool->frag_users;
+ long drain_count = BIAS_MAX(pool) - pool->frag_users;
struct page *page = pool->frag_page;

pool->frag_page = NULL;
@@ -724,7 +720,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
pool->frag_users = 1;
*offset = 0;
pool->frag_offset = size;
- page_pool_fragment_page(page, BIAS_MAX);
+ page_pool_fragment_page(page, BIAS_MAX(pool));
return page;
}

--
2.39.0