2.1.21 BUG in net/ipv4/tcp_ipv4.c

Pedro Roque (roque@di.fc.ul.pt)
Mon, 27 Jan 1997 17:10:51 GMT


>>>>> "Vance" == Vance Huntley <vance@webgenesis.com> writes:

Vance> I am currently doing some web server development, and I'm
Vance> having some trouble. The server flies along just fine for
Vance> somewhere between 5 minutes and an hour, and then it comes
Vance> to a full stop. I've been working under the assumption
Vance> that my own crappy programing was getting me into a
Vance> deadlock situation, but just now I discovered something
Vance> interesting in /var/log/debug.

Any chance you can tell us more details like the programs an options you
use...

Vance> I started stress-testing my server at 00:47, and it happily
Vance> pumped out ~320kB/s until 01:03. I killed the server
Vance> processes at 01:20, where the debug log stopped. Jan 26
Vance> 00:49:31 genji kernel: droping syn ack:20 max:20 Jan 26

The "droping syn ack" message means that either there is a bug in the kernel
or the application is not accepting any more connection requests...

If you are programming your own server, make sure you are doing accept()...

Vance> 00:50:02 genji last message repeated 4 times Jan 26
Vance> 00:51:05 genji last message repeated 20 times Jan 26
Vance> 00:51:10 genji last message repeated 2 times Jan 26

Vance> 00:51:13 genji kernel: BUG: syn_recv:socket exists Jan 26

Ok, actually this one will occur naturally if the app doesn't do accept()
and the remote host does send data.
The code is slightly incorrect although... please try the enclosed patch

Index: net/ipv4/tcp_ipv4.c
===================================================================
RCS file: /public/cvsroot/linux/net/ipv4/tcp_ipv4.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 tcp_ipv4.c
--- tcp_ipv4.c 1996/12/19 11:41:26 1.1.1.2
+++ tcp_ipv4.c 1997/01/27 16:59:00
@@ -346,7 +346,6 @@
struct tcphdr *th = (struct tcphdr*)(dp+(iph->ihl<<2));
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
- struct tcp_opt *tp;
struct sock *sk;

sk = get_sock(&tcp_prot, th->source, iph->daddr, th->dest, iph->saddr);
@@ -356,19 +355,12 @@

if (type == ICMP_SOURCE_QUENCH)
{
- /*
- * FIXME:
- * Follow BSD for now and just reduce cong_window to 1 again.
- * It is possible that we just want to reduce the
- * window by 1/2, or that we want to reduce ssthresh by 1/2
- * here as well.
- */
-
- tp = &sk->tp_pinfo.af_tcp;
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;

- sk->cong_window = 1;
+ sk->ssthresh = max(sk->cong_window >> 1, 2);
+ sk->cong_window = sk->ssthresh + 3;
tp->high_seq = tp->snd_nxt;
-
+
return;
}

@@ -862,15 +854,14 @@
* as we checked the user count on tcp_rcv and we're
* running from a soft interrupt.
*/
-
+
req = tp->syn_wait_queue;
-

if (!req)
{
return sk;
}
-
+
do {
struct tcp_v4_open_req *af_req;

@@ -881,11 +872,15 @@
req->rmt_port == skb->h.th->source)
{
u32 flg;
-
+
if (req->sk)
{
- printk(KERN_DEBUG "BUG: syn_recv:"
- "socket exists\n");
+ /*
+ * socket already created but not
+ * yet accepted()...
+ */
+
+ sk = req->sk;
break;
}

@@ -896,7 +891,7 @@
*/
flg = *(((u32 *)skb->h.th) + 3);
flg &= __constant_htonl(0x002f0000);
-
+
if ((flg == __constant_htonl(0x00020000)) &&
(!after(skb->seq, req->rcv_isn)))
{
@@ -907,7 +902,6 @@
return NULL;
}

- skb_orphan(skb);
sk = tp->af_specific->syn_recv_sock(sk, skb, req);

tcp_dec_slow_timer(TCP_SLT_SYNACK);
@@ -917,48 +911,62 @@
return NULL;
}

- skb_set_owner_r(skb, sk);
req->expires = 0UL;
req->sk = sk;
break;
}

req = req->dl_next;
+
} while (req != tp->syn_wait_queue);
-

+ skb_orphan(skb);
+ skb_set_owner_r(skb, sk);
return sk;
}

-static int __inline__ tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
+
skb_set_owner_r(skb, sk);

+ /*
+ * socket locking is here for SMP purposes as backlog rcv
+ * is currently called with bh processing disabled.
+ */
+ lock_sock(sk);
+
if (sk->state == TCP_ESTABLISHED)
{
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
- return 0;
+ goto ok;
}

if (sk->state == TCP_LISTEN)
{
+ struct sock *nsk;
+
/*
* find possible connection requests
*/
- sk = tcp_v4_check_req(sk, skb);

- if (sk == NULL)
+ nsk = tcp_v4_check_req(sk, skb);
+
+ if (nsk == NULL)
{
goto discard_it;
}
+
+ release_sock(sk);
+ lock_sock(nsk);
+ sk = nsk;
}
-
+
if (tcp_rcv_state_process(sk, skb, skb->h.th, NULL, skb->len) == 0)
- return 0;
+ goto ok;

reset:
-
tcp_v4_send_reset(skb);

discard_it:
@@ -966,12 +974,11 @@
* Discard frame
*/
kfree_skb(skb, FREE_READ);
- return 0;
-}
+ok:

-int __inline__ tcp_v4_backlog_rcv(struct sock *sk, struct sk_buff *skb)
-{
- return tcp_v4_do_rcv(sk, skb);
+ release_sock(sk);
+
+ return 0;
}

/*
@@ -1029,7 +1036,7 @@
skb->seq = ntohl(th->seq);
skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
skb->ack_seq = ntohl(th->ack_seq);
-
+
skb->acked = 0;
skb->used = 0;

@@ -1222,17 +1229,10 @@
tcp_v4_sendmsg,
tcp_recvmsg,
NULL, /* No special bind() */
- tcp_v4_backlog_rcv,
+ tcp_v4_do_rcv,
128,
0,
"TCP",
0, 0,
NULL
};
-
-/*
- * Local variables:
- * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp_ipv4.o tcp_ipv4.c"
- * c-file-style: "Linux"
- * End:
- */