TIME_WAIT problem in 2.0.29, is this a kernel bug ?

Carlo Wood (carlo@runaway.xs4all.nl)
Sun, 6 Apr 1997 10:46:27 +0200 (MET DST)


Hello all,

I am testing stress net.work conditions on 2.0.29.
I wrote a program that opens 16 listen ports (port 3000 through 3015,
each with a backlog of 128), connects at once 50 clients to each listen
port en then bursts 10 kilobytes through each connection.
This is successful, but the time needed to do this turned out to increase.

The first time I run it:
~/c++/libr/libr/rio/tests>burst_tst -n 800 -f
Total time used: 3 seconds and 700722 micro seconds.

And after a few times:
~/c++/libr/libr/rio/tests>burst_tst -n 800 -f
Total time used: 9 seconds and 630763 micro seconds.

The reason for this seems to be obvious:
tcp 1 0 localhost:3015 localhost:1915 TIME_WAIT
tcp 1 0 localhost:3015 localhost:1916 TIME_WAIT
tcp 1 0 localhost:3015 localhost:1917 TIME_WAIT
tcp 1 0 localhost:3015 localhost:1918 TIME_WAIT
...
tcp 1 0 localhost:3000 localhost:1176 TIME_WAIT
tcp 1 0 localhost:3000 localhost:1177 TIME_WAIT
tcp 1 0 localhost:3000 localhost:1178 TIME_WAIT
tcp 1 0 localhost:3000 localhost:1179 TIME_WAIT

The question is, why are there sockets left with TIME_WAIT ?

Here is the full strace of connecting ONE client:
(Which results in one TIME_WAIT after execution).

~/c++/libr/libr/rio/tests>strace burst_tst -n 1 -f
execve("./burst_tst", ["burst_tst", "-n", "1", "-f"], [/* 41 vars */]) = 0
[...lots of normal 'startup' junk deleted...]
gettimeofday({860315559, 450744}, NULL) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
bind(3, {sin_family=AF_INET, sin_port=htons(3000), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
listen(3, 128) = 0
open("/etc/nsswitch.conf", O_RDONLY) = 4
brk(0x805a4d4) = 0x805a4d4
fstat(4, {st_mode=S_IFREG|0644, st_size=1208, ...}) = 0
mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40007000
read(4, "#\n# /etc/nsswitch.conf\n#\n# An"..., 4096) = 1208
read(4, "", 4096) = 0
close(4) = 0
munmap(0x40007000, 4096) = 0
open("/etc/hosts", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=418, ...}) = 0
mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40007000
read(4, "127.0.0.1\tlocalhost\n192.168.1."..., 4096) = 418
lseek(4, -398, SEEK_CUR) = 20
close(4) = 0
munmap(0x40007000, 4096) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 4
setsockopt(4, SOL_SOCKET, SO_RCVBUF, [8192], 4) = 0
setsockopt(4, SOL_SOCKET, SO_SNDBUF, [8192], 4) = 0
fcntl(4, F_GETFL) = 0x2 (flags O_RDWR)
fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
connect(4, {sin_family=AF_INET, sin_port=htons(3000), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 EINPROGRESS (Operation now in progress)
gettimeofday({860315559, 462137}, NULL) = 0
oldselect(5, [3 4], [4], [], NULL) = 2 (in [3], out [4])
gettimeofday({860315559, 463705}, NULL) = 0
write(4, "QUIT\n", 5) = 5
accept(3, {sin_family=AF_INET, sin_port=htons(1386), sin_addr=inet_addr("127.0.0.1")}, [16]) = 5
fcntl(5, F_GETFL) = 0x2 (flags O_RDWR)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
brk(0x805acd4) = 0x805acd4
setsockopt(5, SOL_SOCKET, SO_RCVBUF, [8192], 4) = 0
setsockopt(5, SOL_SOCKET, SO_SNDBUF, [8192], 4) = 0
brk(0x805b4d4) = 0x805b4d4
brk(0x805c4d4) = 0x805c4d4
brk(0x805e4d4) = 0x805e4d4
oldselect(6, [3 4 5], [5], [], NULL) = 2 (in [5], out [5])
gettimeofday({860315559, 467844}, NULL) = 0
write(5, "12345678901234567890123456789012"..., 2042) = 2042
write(5, "34567890123456789012345678901234"..., 2042) = 2042
write(5, "56789012345678901234567890123456"..., 4090) = 4090
write(5, "56789012345678901234567890123456"..., 2067) = 956
read(5, "QUIT\n", 506) = 5
oldselect(6, [3 4 5], [5], [], NULL) = 1 (in [4])
gettimeofday({860315559, 470693}, NULL) = 0
read(4, "12345678901234567890123456789012"..., 506) = 506
read(4, "78901234567890123456789012345678"..., 506) = 506
read(4, "34567890123456789012345678901234"..., 1018) = 1018
read(4, "123456789012", 2042) = 12
oldselect(6, [3 4 5], [5], [], NULL) = 1 (in [4])
gettimeofday({860315559, 482995}, NULL) = 0
read(4, "34567890123456789012345678901234"..., 2030) = 2030
read(4, "34567890123456789012345678901234"..., 4090) = 1514
oldselect(6, [3 4 5], [5], [], NULL) = 2 (in [4], out [5])
gettimeofday({860315559, 484988}, NULL) = 0
write(5, "12345678901234567890123456789012"..., 1111) = 1111
read(4, "78901234567890123456789012345678"..., 2576) = 2576
brk(0x805ecd4) = 0x805ecd4
read(4, "34567890123456789012345678901234"..., 8186) = 968
close(5) = 0
oldselect(5, [3 4], [], [], NULL) = 1 (in [4])
gettimeofday({860315559, 487777}, NULL) = 0
read(4, "12345678901234567890123456789012"..., 7218) = 1111
oldselect(5, [3 4], [], [], NULL) = 1 (in [4])
gettimeofday({860315559, 489222}, NULL) = 0
read(4, "", 6107) = 0
close(4) = 0
gettimeofday({860315559, 489869}, NULL) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(3, 1), ...}) = 0
mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40007000
ioctl(1, TCGETS, {B9600 opost isig icanon echo ...}) = 0
write(1, "Total time used: 0 seconds and 3"..., 52) = 52
close(3) = 0
_exit(0) = ?

As you can see, all sockets - the listen socket (fd 3), the connecting
socket (fd 4) and the accepted socket (fd 5) - are successfully closed.

So why is there a TIME_WAIT left ? Is this a bug ?

Carlo

PS I am not subbed to this list, please CC me.

-- 
 carlo@runaway.xs4all.nl, Run @ IRC.