[patch 48/48] SUNRPC: Fix tcp reconnection

From: Greg KH
Date: Fri Sep 04 2009 - 16:15:16 EST



2.6.27-stable review patch. If anyone has any objections, please let us know.

------------------
From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>

This fixes a problem that was reported as Red Hat Bugzilla entry number
485339, in which rpciod starts looping on the TCP connection code,
rendering the NFS client unusable for 1/2 minute or so.

It is basically a backport of commit
f75e6745aa3084124ae1434fd7629853bdaf6798 (SUNRPC: Fix the problem of
EADDRNOTAVAIL syslog floods on reconnect)

Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>


---
include/linux/sunrpc/xprt.h | 1 +
net/sunrpc/xprt.c | 6 ++----
net/sunrpc/xprtsock.c | 37 ++++++++++++++++++++++++++++++++++---
3 files changed, 37 insertions(+), 7 deletions(-)

--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struc
#define XPRT_BOUND (4)
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
+#define XPRT_CONNECTION_CLOSE (8)

static inline void xprt_set_connected(struct rpc_xprt *xprt)
{
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -645,10 +645,8 @@ xprt_init_autodisconnect(unsigned long d
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
spin_unlock(&xprt->transport_lock);
- if (xprt_connecting(xprt))
- xprt_release_write(xprt, NULL);
- else
- queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+ queue_work(rpciod_workqueue, &xprt->task_cleanup);
return;
out_abort:
spin_unlock(&xprt->transport_lock);
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -748,6 +748,9 @@ out_release:
*
* This is used when all requests are complete; ie, no DRC state remains
* on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
*/
static void xs_close(struct rpc_xprt *xprt)
{
@@ -781,6 +784,14 @@ clear_close_wait:
xprt_disconnect_done(xprt);
}

+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+ if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+ xs_close(xprt);
+ else
+ xs_tcp_shutdown(xprt);
+}
+
/**
* xs_destroy - prepare to shutdown a transport
* @xprt: doomed transport
@@ -1676,11 +1687,21 @@ static void xs_tcp_connect_worker4(struc
goto out_clear;
case -ECONNREFUSED:
case -ECONNRESET:
+ case -ENETUNREACH:
/* retry with existing socket, after a delay */
- break;
+ goto out_clear;
default:
/* get rid of existing socket, and retry */
xs_tcp_shutdown(xprt);
+ printk("%s: connect returned unhandled error %d\n",
+ __func__, status);
+ case -EADDRNOTAVAIL:
+ /* We're probably in TIME_WAIT. Get rid of existing socket,
+ * and retry
+ */
+ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+ xprt_force_disconnect(xprt);
+ status = -EAGAIN;
}
}
out:
@@ -1735,11 +1756,21 @@ static void xs_tcp_connect_worker6(struc
goto out_clear;
case -ECONNREFUSED:
case -ECONNRESET:
+ case -ENETUNREACH:
/* retry with existing socket, after a delay */
- break;
+ goto out_clear;
default:
/* get rid of existing socket, and retry */
xs_tcp_shutdown(xprt);
+ printk("%s: connect returned unhandled error %d\n",
+ __func__, status);
+ case -EADDRNOTAVAIL:
+ /* We're probably in TIME_WAIT. Get rid of existing socket,
+ * and retry
+ */
+ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+ xprt_force_disconnect(xprt);
+ status = -EAGAIN;
}
}
out:
@@ -1871,7 +1902,7 @@ static struct rpc_xprt_ops xs_tcp_ops =
.buf_free = rpc_free,
.send_request = xs_tcp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
- .close = xs_tcp_shutdown,
+ .close = xs_tcp_close,
.destroy = xs_destroy,
.print_stats = xs_tcp_print_stats,
};


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/