[PATCH net-next v4 26/27] io_uring: enable managed frags with register buffers

From: Pavel Begunkov
Date: Thu Jul 07 2022 - 07:53:41 EST


io_uring's registered buffers infra has a good performant way of pinning
pages, so let's use SKBFL_MANAGED_FRAG_REFS when our requests are purely
register buffer backed.

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
io_uring/net.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index bf9916d5e50c..a4e863dce7ec 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -704,6 +704,60 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}

+static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int frag = shinfo->nr_frags;
+ int ret = 0;
+ struct bvec_iter bi;
+ ssize_t copied = 0;
+ unsigned long truesize = 0;
+
+ if (!shinfo->nr_frags)
+ shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
+
+ if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) {
+ skb_zcopy_downgrade_managed(skb);
+ return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
+ }
+
+ bi.bi_size = min(from->count, length);
+ bi.bi_bvec_done = from->iov_offset;
+ bi.bi_idx = 0;
+
+ while (bi.bi_size && frag < MAX_SKB_FRAGS) {
+ struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
+
+ copied += v.bv_len;
+ truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
+ __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
+ v.bv_offset, v.bv_len);
+ bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
+ }
+ if (bi.bi_size)
+ ret = -EMSGSIZE;
+
+ shinfo->nr_frags = frag;
+ from->bvec += bi.bi_idx;
+ from->nr_segs -= bi.bi_idx;
+ from->count = bi.bi_size;
+ from->iov_offset = bi.bi_bvec_done;
+
+ skb->data_len += copied;
+ skb->len += copied;
+ skb->truesize += truesize;
+
+ if (sk && sk->sk_type == SOCK_STREAM) {
+ sk_wmem_queued_add(sk, truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_charge(sk, truesize);
+ } else {
+ refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ }
+ return ret;
+}
+
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
{
struct sockaddr_storage address;
@@ -768,7 +822,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)

msg.msg_flags = msg_flags;
msg.msg_ubuf = &notif->uarg;
- msg.sg_from_iter = NULL;
+ msg.sg_from_iter = io_sg_from_iter;
ret = sock_sendmsg(sock, &msg);

if (unlikely(ret < min_ret)) {
--
2.36.1