[PATCH 2.5.21] Make NFS/RPC client use the TCP zero copy API when hardware supports it

From: Trond Myklebust (trond.myklebust@fys.uio.no)
Date: Sun Jun 16 2002 - 15:59:02 EST


Does anybody with 'zero copy' compatible networking cards (3c59x,
AceNIC, Tigon3, E1000, ....) notice any performance difference when
using NFS TCP mounts with/without this patch?

Cheers,
  Trond

diff -u --recursive --new-file linux-2.5.21/net/sunrpc/xprt.c linux-nfs_zerocopy/net/sunrpc/xprt.c
--- linux-2.5.21/net/sunrpc/xprt.c Fri May 24 13:32:02 2002
+++ linux-nfs_zerocopy/net/sunrpc/xprt.c Sun Jun 16 22:16:22 2002
@@ -67,6 +67,7 @@
 #include <net/tcp.h>
 
 #include <asm/uaccess.h>
+#include <linux/pagemap.h>
 
 extern spinlock_t rpc_queue_lock;
 
@@ -168,6 +169,121 @@
         spin_unlock_bh(&xprt->sock_lock);
 }
 
+/* Write an iovec array to a socket */
+static int
+sock_sendkerneliovec(struct socket *sock, struct sockaddr *addr, int addrlen,
+ struct iovec *iov, size_t count, int size)
+{
+ struct msghdr msg = {
+ msg_name: addr,
+ msg_namelen: addrlen,
+ msg_iov: iov,
+ msg_iovlen: count,
+ msg_control: NULL,
+ msg_controllen: 0,
+ msg_flags: MSG_DONTWAIT|MSG_NOSIGNAL,
+ };
+ mm_segment_t oldfs;
+ int ret;
+
+ oldfs = get_fs(); set_fs(get_ds());
+ ret = sock_sendmsg(sock, &msg, size);
+ set_fs(oldfs);
+ return ret;
+}
+
+static int
+xprt_set_cork_sock(struct socket *sock, int val)
+{
+ mm_segment_t oldfs;
+ int ret;
+
+ oldfs = get_fs(); set_fs(get_ds());
+ ret = sock->ops->setsockopt(sock, SOL_TCP, TCP_CORK,
+ (char *)&val, sizeof(val));
+ set_fs(oldfs);
+ return ret;
+}
+
+static inline int
+xprt_cork_sock(struct socket *sock)
+{
+ return xprt_set_cork_sock(sock, 1);
+}
+
+static inline void
+xprt_uncork_sock(struct socket *sock)
+{
+ xprt_set_cork_sock(sock, 0);
+}
+
+/* Send the XDR buffer using the zero copy socket API */
+static int
+xdr_sendpages(struct socket *sock, struct xdr_buf *xdr, size_t base)
+{
+ struct iovec iov;
+ struct page **ppage = xdr->pages;
+ unsigned int len, pglen = xdr->page_len;
+ int err, copied = 0;
+
+ if ((err = xprt_cork_sock(sock)) < 0)
+ return err;
+ len = xdr->head[0].iov_len;
+ if (base < len) {
+ len -= base;
+ iov.iov_len = len;
+ iov.iov_base = (char *)xdr->head[0].iov_base + base;
+ err = sock_sendkerneliovec(sock, NULL, 0, &iov, 1, len);
+ if (err > 0)
+ copied += err;
+ if (err != len)
+ goto out_err;
+ base = 0;
+ } else
+ base -= len;
+ if (base >= pglen) {
+ base -= pglen;
+ goto send_tail;
+ }
+ if (base || xdr->page_base) {
+ pglen -= base;
+ base += xdr->page_base;
+ ppage += base >> PAGE_CACHE_SHIFT;
+ base &= ~PAGE_CACHE_MASK;
+ }
+ do {
+ len = PAGE_CACHE_SIZE;
+ if (base)
+ len -= base;
+ if (pglen < len)
+ len = pglen;
+ err = sock->ops->sendpage(sock, *ppage, base, len, MSG_DONTWAIT);
+ if (err > 0)
+ copied += err;
+ if (err != len)
+ goto out_err;
+ base = 0;
+ ppage++;
+ } while ((pglen -= len) != 0);
+send_tail:
+ len = xdr->tail[0].iov_len;
+ if (len && base < len) {
+ len -= base;
+ iov.iov_len = len;
+ iov.iov_base = (char *)xdr->tail[0].iov_base + base;
+ err = sock_sendkerneliovec(sock, NULL, 0, &iov, 1, len);
+ if (err > 0)
+ copied += err;
+ if (err != len)
+ goto out_err;
+ }
+ xprt_uncork_sock(sock);
+ return copied;
+out_err:
+ xprt_uncork_sock(sock);
+ return copied != 0 ? copied : err;
+}
+
 /*
  * Write data to socket.
  */
@@ -175,11 +291,8 @@
 xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
         struct socket *sock = xprt->sock;
- struct msghdr msg;
         struct xdr_buf *xdr = &req->rq_snd_buf;
- struct iovec niv[MAX_IOVEC];
- unsigned int niov, slen, skip;
- mm_segment_t oldfs;
+ unsigned int slen, skip;
         int result;
 
         if (!sock)
@@ -192,21 +305,16 @@
         /* Dont repeat bytes */
         skip = req->rq_bytes_sent;
         slen = xdr->len - skip;
- niov = xdr_kmap(niv, xdr, skip);
-
- msg.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL;
- msg.msg_iov = niv;
- msg.msg_iovlen = niov;
- msg.msg_name = (struct sockaddr *) &xprt->addr;
- msg.msg_namelen = sizeof(xprt->addr);
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
-
- oldfs = get_fs(); set_fs(get_ds());
- result = sock_sendmsg(sock, &msg, slen);
- set_fs(oldfs);
 
- xdr_kunmap(xdr, skip);
+ if (xdr->page_len == 0 || !xprt->stream) {
+ struct iovec niv[MAX_IOVEC];
+ unsigned int niov;
+ niov = xdr_kmap(niv, xdr, skip);
+ result = sock_sendkerneliovec(sock, (struct sockaddr *)&xprt->addr,
+ sizeof(xprt->addr), niv, niov, slen);
+ xdr_kunmap(xdr, skip);
+ } else
+ result = xdr_sendpages(sock, xdr, skip);
 
         dprintk("RPC: xprt_sendmsg(%d) = %d\n", slen, result);
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Jun 23 2002 - 22:00:11 EST