[PATCH v6 34/34] net: [RFC][WIP] Make __zerocopy_sg_from_iter() correctly pin or leave pages unref'd

From: David Howells
Date: Mon Jan 16 2023 - 18:23:02 EST


Make __zerocopy_sg_from_iter() call iov_iter_extract_pages() to get pages
that have been ref'd, pinned or left alone as appropriate. As this is only
used for source buffers, pinning isn't an option, but being unref'd is.

The way __zerocopy_sg_from_iter() merges fragments is also altered, such
that fragments must also match their cleanup modes to be merged.

An extra helper and wrapper, folio_put_unpin_sub() and page_put_unpin_sub()
are added to allow multiple refs to be put/unpinned.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: netdev@xxxxxxxxxxxxxxx
---

include/linux/mm.h | 2 ++
mm/gup.c | 25 +++++++++++++++++++++++++
net/core/datagram.c | 23 +++++++++++++----------
3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f14edb192394..e3923b89c75e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1368,7 +1368,9 @@ static inline bool is_cow_mapping(vm_flags_t flags)
#endif

void folio_put_unpin(struct folio *folio, unsigned int flags);
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags, unsigned int refs);
void page_put_unpin(struct page *page, unsigned int flags);
+void page_put_unpin_sub(struct page *page, unsigned int flags, unsigned int refs);

/*
* The identification function is mainly used by the buddy allocator for
diff --git a/mm/gup.c b/mm/gup.c
index 3ee4b4c7e0cb..49dd27ba6c13 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -213,6 +213,31 @@ void page_put_unpin(struct page *page, unsigned int flags)
}
EXPORT_SYMBOL_GPL(page_put_unpin);

+/**
+ * folio_put_unpin_sub - Unpin/put a folio as appropriate
+ * @folio: The folio to release
+ * @flags: gup flags indicating the mode of release (FOLL_*)
+ * @refs: Number of refs/pins to drop
+ *
+ * Release a folio according to the flags. If FOLL_GET is set, the folio has a
+ * ref dropped; if FOLL_PIN is set, it is unpinned; otherwise it is left
+ * unaltered.
+ */
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags,
+ unsigned int refs)
+{
+ if (flags & (FOLL_GET | FOLL_PIN))
+ gup_put_folio(folio, refs, flags);
+}
+EXPORT_SYMBOL_GPL(folio_put_unpin_sub);
+
+void page_put_unpin_sub(struct page *page, unsigned int flags,
+ unsigned int refs)
+{
+ folio_put_unpin_sub(page_folio(page), flags, refs);
+}
+EXPORT_SYMBOL_GPL(page_put_unpin_sub);
+
/**
* try_grab_page() - elevate a page's refcount by a flag-dependent amount
* @page: pointer to page to be grabbed
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 122bfb144d32..63ea1f8817e0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -614,6 +614,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from,
size_t length)
{
+ unsigned int cleanup_mode = iov_iter_extract_mode(from, FOLL_SOURCE_BUF);
int frag;

if (msg && msg->msg_ubuf && msg->sg_from_iter)
@@ -622,7 +623,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
frag = skb_shinfo(skb)->nr_frags;

while (length && iov_iter_count(from)) {
- struct page *pages[MAX_SKB_FRAGS];
+ struct page *pages[MAX_SKB_FRAGS], **ppages = pages;
struct page *last_head = NULL;
size_t start;
ssize_t copied;
@@ -632,9 +633,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;

- copied = iov_iter_get_pages(from, pages, length,
- MAX_SKB_FRAGS - frag, &start,
- FOLL_SOURCE_BUF);
+ copied = iov_iter_extract_pages(from, &ppages, length,
+ MAX_SKB_FRAGS - frag,
+ FOLL_SOURCE_BUF, &start);
if (copied < 0)
return -EFAULT;

@@ -662,12 +663,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];

if (head == skb_frag_page(last) &&
+ cleanup_mode == skb_frag_cleanup(last) &&
start == skb_frag_off(last) + skb_frag_size(last)) {
skb_frag_size_add(last, size);
/* We combined this page, we need to release
- * a reference. Since compound pages refcount
- * is shared among many pages, batch the refcount
- * adjustments to limit false sharing.
+ * a reference or a pin. Since compound pages
+ * refcount is shared among many pages, batch
+ * the refcount adjustments to limit false
+ * sharing.
*/
last_head = head;
refs++;
@@ -675,14 +678,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
}
}
if (refs) {
- page_ref_sub(last_head, refs);
+ page_put_unpin_sub(last_head, cleanup_mode, refs);
refs = 0;
}
skb_fill_page_desc_noacc(skb, frag++, head, start, size,
- FOLL_GET);
+ cleanup_mode);
}
if (refs)
- page_ref_sub(last_head, refs);
+ page_put_unpin_sub(last_head, cleanup_mode, refs);
}
return 0;
}