[PATCH RFC 6/8] skbuff: always try to do page pool frag reference counting

From: Yunsheng Lin
Date: Mon Nov 13 2023 - 08:01:08 EST


As we have unified the frag_count handling in [1], we can
do frag reference counting instead of native page reference
counting whenever possible in net stack, in order to enable
the best possibility of recycling as we assume other
subsystem other than page pool is also hodling on to a page
if page_ref_count(page) != 1, so we stop the recycling and
release the page from page pool.

As the page from the devmem, we reuse most of the fields in
'struct page' for devmem in order to have unified handling
for both normal memory and devmem, but the meta data is not
really tied to mm, so we can't really use page->_refcount
as reference counting for devmem, instead it relies on page
pool's frag reference counting.

After this patch, pp_frag_count is not appropriate name as
we don't ensure only one user holding on to a frag, we may
rename it to frag_refcount in the future to avoid confusion.

1. https://lore.kernel.org/all/20231020095952.11055-1-linyunsheng@xxxxxxxxxx/

Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
---
.../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 2 +-
drivers/net/ethernet/sun/cassini.c | 4 ++--
drivers/net/veth.c | 2 +-
include/linux/skbuff.h | 12 +++++++++---
include/net/page_pool/types.h | 13 +++++++++++++
net/core/skbuff.c | 6 +++---
net/tls/tls_device_fallback.c | 2 +-
7 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index 6482728794dd..87fda3e9d7d1 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -1658,7 +1658,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
for (i = 0; i < record->num_frags; i++) {
skb_shinfo(nskb)->frags[i] = record->frags[i];
/* increase the frag ref count */
- __skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+ skb_frag_ref(nskb, i);
}

skb_shinfo(nskb)->nr_frags = record->num_frags;
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index b317b9486455..8b818809c721 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1999,7 +1999,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
skb->len += hlen - swivel;

skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
- __skb_frag_ref(frag);
+ __skb_frag_ref(frag, skb->pp_recycle);

/* any more data? */
if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2023,7 +2023,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
frag++;

skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
- __skb_frag_ref(frag);
+ __skb_frag_ref(frag, skb->pp_recycle);
RX_USED_ADD(page, hlen + cp->crc_size);
}

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 9980517ed8b0..8ee323a1184b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -724,7 +724,7 @@ static void veth_xdp_get(struct xdp_buff *xdp)
return;

for (i = 0; i < sinfo->nr_frags; i++)
- __skb_frag_ref(&sinfo->frags[i]);
+ __skb_frag_ref(&sinfo->frags[i], false);
}

static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1889b0968be0..b77043a2b446 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
#endif
#include <net/net_debug.h>
#include <net/dropreason-core.h>
+#include <net/page_pool/types.h>

/**
* DOC: skb checksums
@@ -3429,9 +3430,14 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*
* Takes an additional reference on the paged fragment @frag.
*/
-static inline void __skb_frag_ref(skb_frag_t *frag)
+static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
{
- page_ref_inc(skb_frag_page(frag));
+ struct page *page = skb_frag_page(frag);
+
+ if (page_pool_frag_ref(page, recycle))
+ return;
+
+ page_ref_inc(page);
}

/**
@@ -3443,7 +3449,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag)
*/
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
- __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
+ __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
}

bool napi_pp_put_page(struct page *page, bool napi_safe);
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 52e4cf98ebc6..ea4e654168bd 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -275,6 +275,19 @@ static inline bool is_page_pool_compiled_in(void)
#endif
}

+static inline bool page_pool_frag_ref(struct page *page, bool recycle)
+{
+ if (recycle && (page->pp_magic & ~0x3UL) == PP_SIGNATURE) {
+ atomic_long_inc(&page->pp_frag_count);
+ return true;
+ }
+
+ BUG_ON((page->pp_magic & ~0x3UL) == PP_SIGNATURE &&
+ page->pp->mp_ops);
+
+ return false;
+}
+
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid);

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ada3da4fe221..a33df6e49694 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4046,7 +4046,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;

} else {
- __skb_frag_ref(fragfrom);
+ __skb_frag_ref(fragfrom, skb->pp_recycle);
skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
@@ -4695,7 +4695,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
}

*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
- __skb_frag_ref(nskb_frag);
+ __skb_frag_ref(nskb_frag, nskb->pp_recycle);
size = skb_frag_size(nskb_frag);

if (pos < offset) {
@@ -5830,7 +5830,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
* since we set nr_frags to 0.
*/
for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i]);
+ __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);

to->truesize += delta;
to->len += len;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 4e7228f275fa..d4000b4a1f7d 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -277,7 +277,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
for (i = 0; remaining > 0; i++) {
skb_frag_t *frag = &record->frags[i];

- __skb_frag_ref(frag);
+ __skb_frag_ref(frag, false);
sg_set_page(sg_in + i, skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag));

--
2.33.0