<!-- MHonArc v2.6.6 -->
<!--X-Subject: [PATCH 49/62] ocfs2:  Reconnect after idle time out. -->
<!--X-From-R13: [nex Tnfuru <zsnfuruNfhfr.pbz> -->
<!--X-Date: Wed,  2 Apr 2008 16:36:51 &#45;0400 (EDT) -->
<!--X-Message-Id: 12071674213882&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Content-Type: text/plain -->
<!--X-Reference: 12071673121124&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167318479&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673203909&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673232465&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673263948&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673282579&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167330482&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673321172&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673341888&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673362069&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673391031&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673413492&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167343926&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673461268&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673481928&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673492775&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673511262&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673543752&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167355295&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673571757&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673591158&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673613757&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167363119&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673661674&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673681650&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673701937&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673722691&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673731276&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673753788&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673772233&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167379283&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673821008&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673843208&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673872568&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673882492&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167390414&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167392537&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673941659&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071673964014&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167398684&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674013954&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674033903&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674061934&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674083120&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674103432&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 1207167413855&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674151752&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674171596&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Reference: 12071674193441&#45;git&#45;send&#45;email&#45;mfasheh@suse.com -->
<!--X-Head-End-->
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
                      "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
<title>Linux-Kernel Archive: [PATCH 49/62] ocfs2:  Reconnect after idle time out.</title>
<meta NAME="Author" CONTENT="Mark Fasheh &lt;mfasheh@suse.com&gt;">
<meta NAME="Subject" CONTENT="[PATCH 49/62] ocfs2:  Reconnect after idle time out.">
</head> 
<body BGCOLOR="#FFFFFF" TEXT="#000000">

<!--X-Body-Begin-->
<!--X-User-Header-->
<!--X-User-Header-End-->
<!--X-TopPNI-->


<!--X-TopPNI-End-->
<!--X-MsgBody-->
<!--X-Subject-Header-Begin-->
<h1>[PATCH 49/62] ocfs2:  Reconnect after idle time out.</h1>
<strong>From: </strong>Mark Fasheh
<br><strong>Date: </strong> Wed Apr 02 2008 - 16:36:51 EST
<p>
<ul>
<li><strong>Next message: </strong> <a href="0944.html"> Mark Fasheh: "[PATCH 50/62] sysfs: Allow removal of symlinks in the sysfs root"</a>

<li><strong>Previous message: </strong> <a href="0942.html"> Mark Fasheh: "[PATCH 48/62] ocfs2/dlm: Cleanup lockres print"</a>

<li><strong>In reply to: </strong> <a href="0942.html"> Mark Fasheh: "[PATCH 48/62] ocfs2/dlm: Cleanup lockres print"</a>

<li><strong>Next in thread: </strong> <a href="0944.html"> Mark Fasheh: "[PATCH 50/62] sysfs: Allow removal of symlinks in the sysfs root"</a>

<li><strong>Messages sorted by: </strong><a href="date.html#0943">[ date ]</a> <a href="index.html#0943">[ thread ]</a> <a href="subject.html#0943">[ subject ]</a> <a href="author.html#0943">[ author ]</a>
</ul>

<!--X-Subject-Header-End-->
<!--X-Head-of-Message-->
<!--X-Head-of-Message-End-->
<!--X-Head-Body-Sep-Begin-->
<hr NOSHADE>
<!--X-Head-Body-Sep-End-->
<!--X-Body-of-Message-->
From: Tao Ma &lt;tao.ma@xxxxxxxxxx&gt;<br>
<br>
Currently, o2net connects to a node on hb_up and disconnects on<br>
hb_down and net timeout.<br>
<br>
It disconnects on net timeout is ok, but it should attempt to<br>
reconnect back. This is because sometimes nodes get overloaded<br>
enough that the network connection breaks but the disk hb does not.<br>
And if we get into that situation, we either fence (unnecessarily)<br>
or wait for its disk hb to die (and sometimes hang in the process).<br>
<br>
So in this updated scheme, when the network disconnects, we keep<br>
attempting to reconnect till we succeed or we get a disk hb down<br>
event.<br>
<br>
If the other node is really dead, then we will eventually get a<br>
node down event. If not, we should be able to connect again and<br>
continue.<br>
<br>
Signed-off-by: Tao Ma &lt;tao.ma@xxxxxxxxxx&gt;<br>
Signed-off-by: Mark Fasheh &lt;mfasheh@xxxxxxxx&gt;<br>
---<br>
 fs/ocfs2/cluster/tcp.c          |   51 +++++++++++++++++++++++++++-----------<br>
 fs/ocfs2/cluster/tcp_internal.h |    2 +<br>
 2 files changed, 38 insertions(+), 15 deletions(-)<br>
<br>
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c<br>
index b8057c5..4ea4b0a 100644<br>
--- a/fs/ocfs2/cluster/tcp.c<br>
+++ b/fs/ocfs2/cluster/tcp.c<br>
@@ -399,8 +399,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,<br>
 	mlog_bug_on_msg(err &amp;&amp; valid, &quot;err %d valid %u\n&quot;, err, valid);<br>
 	mlog_bug_on_msg(valid &amp;&amp; !sc, &quot;valid %u sc %p\n&quot;, valid, sc);<br>
 <br>
-	/* we won't reconnect after our valid conn goes away for<br>
-	 * this hb iteration.. here so it shows up in the logs */<br>
 	if (was_valid &amp;&amp; !valid &amp;&amp; err == 0)<br>
 		err = -ENOTCONN;<br>
 <br>
@@ -430,11 +428,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,<br>
 <br>
 	if (!was_valid &amp;&amp; valid) {<br>
 		o2quo_conn_up(o2net_num_from_nn(nn));<br>
-		/* this is a bit of a hack.  we only try reconnecting<br>
-		 * when heartbeating starts until we get a connection.<br>
-		 * if that connection then dies we don't try reconnecting.<br>
-		 * the only way to start connecting again is to down<br>
-		 * heartbeat and bring it back up. */<br>
 		cancel_delayed_work(&amp;nn-&gt;nn_connect_expired);<br>
 		printk(KERN_INFO &quot;o2net: %s &quot; SC_NODEF_FMT &quot;\n&quot;,<br>
 		       o2nm_this_node() &gt; sc-&gt;sc_node-&gt;nd_num ?<br>
@@ -457,6 +450,18 @@ static void o2net_set_nn_state(struct o2net_node *nn,<br>
 			delay = 0;<br>
 		mlog(ML_CONN, &quot;queueing conn attempt in %lu jiffies\n&quot;, delay);<br>
 		queue_delayed_work(o2net_wq, &amp;nn-&gt;nn_connect_work, delay);<br>
+<br>
+		/*<br>
+		 * Delay the expired work after idle timeout.<br>
+		 *<br>
+		 * We might have lots of failed connection attempts that run<br>
+		 * through here but we only cancel the connect_expired work when<br>
+		 * a connection attempt succeeds.  So only the first enqueue of<br>
+		 * the connect_expired work will do anything.  The rest will see<br>
+		 * that it's already queued and do nothing.<br>
+		 */<br>
+		delay += msecs_to_jiffies(o2net_idle_timeout(NULL));<br>
+		queue_delayed_work(o2net_wq, &amp;nn-&gt;nn_connect_expired, delay);<br>
 	}<br>
 <br>
 	/* keep track of the nn's sc ref for the caller */<br>
@@ -1193,6 +1198,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)<br>
 	 * shut down already */<br>
 	if (nn-&gt;nn_sc == sc) {<br>
 		o2net_sc_reset_idle_timer(sc);<br>
+		atomic_set(&amp;nn-&gt;nn_timeout, 0);<br>
 		o2net_set_nn_state(nn, sc, 1, 0);<br>
 	}<br>
 	spin_unlock(&amp;nn-&gt;nn_lock);<br>
@@ -1391,6 +1397,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work)<br>
 static void o2net_idle_timer(unsigned long data)<br>
 {<br>
 	struct o2net_sock_container *sc = (struct o2net_sock_container *)data;<br>
+	struct o2net_node *nn = o2net_nn_from_num(sc-&gt;sc_node-&gt;nd_num);<br>
 	struct timeval now;<br>
 <br>
 	do_gettimeofday(&amp;now);<br>
@@ -1413,6 +1420,12 @@ static void o2net_idle_timer(unsigned long data)<br>
 	     sc-&gt;sc_tv_func_start.tv_sec, (long) sc-&gt;sc_tv_func_start.tv_usec,<br>
 	     sc-&gt;sc_tv_func_stop.tv_sec, (long) sc-&gt;sc_tv_func_stop.tv_usec);<br>
 <br>
+	/*<br>
+	 * Initialize the nn_timeout so that the next connection attempt<br>
+	 * will continue in o2net_start_connect.<br>
+	 */<br>
+	atomic_set(&amp;nn-&gt;nn_timeout, 1);<br>
+<br>
 	o2net_sc_queue_work(sc, &amp;sc-&gt;sc_shutdown_work);<br>
 }<br>
 <br>
@@ -1447,6 +1460,7 @@ static void o2net_start_connect(struct work_struct *work)<br>
 	struct socket *sock = NULL;<br>
 	struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };<br>
 	int ret = 0, stop;<br>
+	unsigned int timeout;<br>
 <br>
 	/* if we're greater we initiate tx, otherwise we accept */<br>
 	if (o2nm_this_node() &lt;= o2net_num_from_nn(nn))<br>
@@ -1466,8 +1480,17 @@ static void o2net_start_connect(struct work_struct *work)<br>
 	}<br>
 <br>
 	spin_lock(&amp;nn-&gt;nn_lock);<br>
-	/* see if we already have one pending or have given up */<br>
-	stop = (nn-&gt;nn_sc || nn-&gt;nn_persistent_error);<br>
+	/*<br>
+	 * see if we already have one pending or have given up.<br>
+	 * For nn_timeout, it is set when we close the connection<br>
+	 * because of the idle time out. So it means that we have<br>
+	 * at least connected to that node successfully once,<br>
+	 * now try to connect to it again.<br>
+	 */<br>
+	timeout = atomic_read(&amp;nn-&gt;nn_timeout);<br>
+	stop = (nn-&gt;nn_sc ||<br>
+		(nn-&gt;nn_persistent_error &amp;&amp;<br>
+		(nn-&gt;nn_persistent_error != -ENOTCONN || timeout == 0)));<br>
 	spin_unlock(&amp;nn-&gt;nn_lock);<br>
 	if (stop)<br>
 		goto out;<br>
@@ -1579,6 +1602,7 @@ void o2net_disconnect_node(struct o2nm_node *node)<br>
 <br>
 	/* don't reconnect until it's heartbeating again */<br>
 	spin_lock(&amp;nn-&gt;nn_lock);<br>
+	atomic_set(&amp;nn-&gt;nn_timeout, 0);<br>
 	o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);<br>
 	spin_unlock(&amp;nn-&gt;nn_lock);<br>
 <br>
@@ -1613,17 +1637,12 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,<br>
 		(msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);<br>
 <br>
 	if (node_num != o2nm_this_node()) {<br>
-		/* heartbeat doesn't work unless a local node number is<br>
-		 * configured and doing so brings up the o2net_wq, so we can<br>
-		 * use it.. */<br>
-		queue_delayed_work(o2net_wq, &amp;nn-&gt;nn_connect_expired,<br>
-		                   msecs_to_jiffies(o2net_idle_timeout(node)));<br>
-<br>
 		/* believe it or not, accept and node hearbeating testing<br>
 		 * can succeed for this node before we got here.. so<br>
 		 * only use set_nn_state to clear the persistent error<br>
 		 * if that hasn't already happened */<br>
 		spin_lock(&amp;nn-&gt;nn_lock);<br>
+		atomic_set(&amp;nn-&gt;nn_timeout, 0);<br>
 		if (nn-&gt;nn_persistent_error)<br>
 			o2net_set_nn_state(nn, NULL, 0, 0);<br>
 		spin_unlock(&amp;nn-&gt;nn_lock);<br>
@@ -1747,6 +1766,7 @@ static int o2net_accept_one(struct socket *sock)<br>
 	new_sock = NULL;<br>
 <br>
 	spin_lock(&amp;nn-&gt;nn_lock);<br>
+	atomic_set(&amp;nn-&gt;nn_timeout, 0);<br>
 	o2net_set_nn_state(nn, sc, 0, 0);<br>
 	spin_unlock(&amp;nn-&gt;nn_lock);<br>
 <br>
@@ -1941,6 +1961,7 @@ int o2net_init(void)<br>
 	for (i = 0; i &lt; ARRAY_SIZE(o2net_nodes); i++) {<br>
 		struct o2net_node *nn = o2net_nn_from_num(i);<br>
 <br>
+		atomic_set(&amp;nn-&gt;nn_timeout, 0);<br>
 		spin_lock_init(&amp;nn-&gt;nn_lock);<br>
 		INIT_DELAYED_WORK(&amp;nn-&gt;nn_connect_work, o2net_start_connect);<br>
 		INIT_DELAYED_WORK(&amp;nn-&gt;nn_connect_expired,<br>
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h<br>
index d25b9af..b4c5586 100644<br>
--- a/fs/ocfs2/cluster/tcp_internal.h<br>
+++ b/fs/ocfs2/cluster/tcp_internal.h<br>
@@ -95,6 +95,8 @@ struct o2net_node {<br>
 	unsigned			nn_sc_valid:1;<br>
 	/* if this is set tx just returns it */<br>
 	int				nn_persistent_error;<br>
+	/* It is only set to 1 after the idle time out. */<br>
+	atomic_t			nn_timeout;<br>
 <br>
 	/* threads waiting for an sc to arrive wait on the wq for generation<br>
 	 * to increase.  it is increased when a connecting socket succeeds<br>
-- <br>
1.5.4.1<br>
<br>
--<br>
To unsubscribe from this list: send the line &quot;unsubscribe linux-kernel&quot; in<br>
the body of a message to majordomo@xxxxxxxxxxxxxxx<br>
More majordomo info at  <a  href="http://vger.kernel.org/majordomo-info.html">http://vger.kernel.org/majordomo-info.html</a><br>
Please read the FAQ at  <a  href="http://www.tux.org/lkml/">http://www.tux.org/lkml/</a><br>
<br>
<br>

<!--X-Body-of-Message-End-->
<!--X-MsgBody-End-->
<!--X-Follow-Ups-->
<hr NOSHADE>


</ul></li></ul>
<!--X-Follow-Ups-End-->
<!--X-References-->


<!--X-References-End-->
<!--X-BotPNI-->
<ul>
<li><strong>Next message: </strong> <a href="0944.html"> Mark Fasheh: "[PATCH 50/62] sysfs: Allow removal of symlinks in the sysfs root"</a>

<li><strong>Previous message: </strong> <a href="0942.html"> Mark Fasheh: "[PATCH 48/62] ocfs2/dlm: Cleanup lockres print"</a>

<li><strong>In reply to: </strong> <a href="0942.html"> Mark Fasheh: "[PATCH 48/62] ocfs2/dlm: Cleanup lockres print"</a>

<li><strong>Next in thread: </strong> <a href="0944.html"> Mark Fasheh: "[PATCH 50/62] sysfs: Allow removal of symlinks in the sysfs root"</a>

<li><strong>Messages sorted by: </strong><a href="date.html#0943">[ date ]</a> <a href="index.html#0943">[ thread ]</a> <a href="subject.html#0943">[ subject ]</a> <a href="author.html#0943">[ author ]</a>
</ul>

<!--X-BotPNI-End-->
<!--X-User-Footer-->
<!--X-User-Footer-End-->
</body>
</html>