I have a small program to check socket performance (pipe, UDP and TCP),
and the 2.1er kernels (up to 113, I didn't test newer ones yet, but I
don't expect a difference), have a strange behaviour for TCP on block
sides between 32 and 512 bytes. That's the output (on the loopback
device, certainly! I don't have a Gigabit Ethernet yet ;-):
chunks size
tcp ( 4096* 1): user=0.00, system=0.03, elapsed=0.05, CPU=58%, MB/s=0.152359
tcp ( 4096* 2): user=0.01, system=0.02, elapsed=0.04, CPU=82%, MB/s=0.430726
tcp ( 4096* 4): user=0.00, system=0.03, elapsed=0.04, CPU=69%, MB/s=0.725192
tcp ( 4096* 8): user=0.00, system=0.03, elapsed=0.04, CPU=70%, MB/s=1.475414
tcp ( 4096* 16): user=0.01, system=0.03, elapsed=0.04, CPU=93%, MB/s=2.926786
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tcp ( 4096* 32): user=0.00, system=0.03, elapsed=0.35, CPU=8%, MB/s=0.719640
tcp ( 4096* 64): user=0.00, system=0.05, elapsed=0.68, CPU=7%, MB/s=0.731515
tcp ( 4096* 128): user=0.00, system=0.09, elapsed=1.33, CPU=6%, MB/s=0.750620
tcp ( 4096* 256): user=0.00, system=0.01, elapsed=3.42, CPU=0%, MB/s=0.584337
tcp ( 4096* 512): user=0.00, system=0.01, elapsed=4.86, CPU=0%, MB/s=0.823260
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
tcp ( 4096*1024): user=0.02, system=0.30, elapsed=0.33, CPU=95%, MB/s=23.976859
tcp ( 4096*2048): user=0.03, system=0.53, elapsed=0.57, CPU=98%, MB/s=28.135034
tcp ( 4096*4096): user=0.01, system=1.03, elapsed=1.08, CPU=96%, MB/s=29.631713
tcp ( 4096*8192): user=0.01, system=1.69, elapsed=1.71, CPU=99%, MB/s=37.463620
I tried with different chunk numbers, and this changes the window of the
strange behavior, e.g. with 1024 chunks, I get
tcp ( 1024* 1): user=0.00, system=0.00, elapsed=0.02, CPU=0%, MB/s=0.095681
tcp ( 1024* 2): user=0.00, system=0.00, elapsed=0.02, CPU=0%, MB/s=0.195059
tcp ( 1024* 4): user=0.00, system=0.00, elapsed=0.05, CPU=0%, MB/s=0.166174
tcp ( 1024* 8): user=0.00, system=0.00, elapsed=0.02, CPU=0%, MB/s=0.664047
tcp ( 1024* 16): user=0.00, system=0.00, elapsed=0.02, CPU=0%, MB/s=1.541456
tcp ( 1024* 32): user=0.00, system=0.00, elapsed=0.02, CPU=0%, MB/s=2.995009
tcp ( 1024* 64): user=0.00, system=0.01, elapsed=0.04, CPU=27%, MB/s=3.465006
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tcp ( 1024* 128): user=0.00, system=0.01, elapsed=0.33, CPU=3%, MB/s=0.753128
tcp ( 1024* 256): user=0.00, system=0.00, elapsed=0.66, CPU=0%, MB/s=0.759937
tcp ( 1024* 512): user=0.00, system=0.00, elapsed=1.12, CPU=0%, MB/s=0.891513
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
tcp ( 1024*1024): user=0.00, system=0.07, elapsed=0.09, CPU=80%, MB/s=22.969503
tcp ( 1024*2048): user=0.00, system=0.12, elapsed=0.14, CPU=85%, MB/s=28.614152
tcp ( 1024*4096): user=0.00, system=0.22, elapsed=0.26, CPU=85%, MB/s=30.914648
tcp ( 1024*8192): user=0.02, system=0.40, elapsed=0.42, CPU=99%, MB/s=37.801378
so it seems to be connected with an 128K limit (I tried other values, and
indeed, once you fill 128k with small chunks, the kernel seems urged to
wait).
Is this a bug/feature? Has anybody an idea why there is an 128k limit, and
why it doesn't matter once the chunk size is 1024 bytes or larger? I don't
think it affects real world benchmarks, but who knows.
I appended the performance check program, so you can test the effect
yourself.
Bernd Paysan
"Late answers are wrong answers!"
http://www.jwdt.com/~paysan/
------------------------------socktest.c-----------------------------------
/* socktest from Bernd Paysan <bernd.paysan@gmx.de>
* available under GPL
*/
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/resource.h>
#include <linux/in.h>
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <signal.h>
#define TIME(x) ((double)((x).tv_sec)+(double)((x).tv_usec)*1e-6)
#define BIGGEST 4096
double gettime ()
{
struct timeval time1;
struct timezone zone1;
gettimeofday(&time1,&zone1);
return TIME(time1);
}
void main(int argc, char ** argv, char ** env)
{
int pid1, pid2, sv[4];
char buf[1];
int n, m, i, j, r, udp=0;
if(argc < 4) fprintf(stderr,"usage: socktest ip/unix n m\n"), exit(1);
n=atoi(argv[2]);
m=atoi(argv[3]);
if(!strcmp(argv[1],"unix")) r=socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, sv);
else if(!strcmp(argv[1],"tcp")) {
struct sockaddr_in sock1, sock2;
int dummy=16;
sv[0]=socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
sv[1]=socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
sock1.sin_family=AF_INET;
sock1.sin_port=htons(0);
sock1.sin_addr.s_addr=inet_addr("127.0.0.1");
sock2.sin_family=AF_INET;
sock2.sin_port=htons(0);
sock2.sin_addr.s_addr=inet_addr("127.0.0.1");
r=bind(sv[0], (struct sockaddr *)&sock1, 16);
r=bind(sv[1], (struct sockaddr *)&sock2, 16);
r=getsockname(sv[0], (struct sockaddr *)&sock1, &dummy);
r=getsockname(sv[1], (struct sockaddr *)&sock2, &dummy);
r=listen(sv[1],5);
r=connect(sv[0], (struct sockaddr *)&sock2, 16);
dummy = accept(sv[1], 0, NULL);
close(sv[1]);
sv[1]=dummy;
} else if(!strcmp(argv[1],"udp")) {
struct sockaddr_in sock1, sock2;
int dummy=16;
sv[0]=socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
sv[1]=socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
sock1.sin_family=AF_INET;
sock1.sin_port=htons(0);
sock1.sin_addr.s_addr=inet_addr("127.0.0.1");
sock2.sin_family=AF_INET;
sock2.sin_port=htons(0);
sock2.sin_addr.s_addr=inet_addr("127.0.0.1");
r=bind(sv[0], (struct sockaddr *)&sock1, 16);
r=bind(sv[1], (struct sockaddr *)&sock2, 16);
r=getsockname(sv[0], (struct sockaddr *)&sock1, &dummy);
r=getsockname(sv[1], (struct sockaddr *)&sock2, &dummy);
/* r=connect(sv[0], (struct sockaddr *)&sock2, 16); */
r=connect(sv[1], (struct sockaddr *)&sock1, 16);
udp=1;
socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, sv+2);
} else
fprintf(stderr,"usage: socktest ip/unix n m\n"), exit(1);
if(r) fprintf(stderr,"socketpair returned %d\n", r), exit(r);
n *= m;
{
char buffer[m];
struct timeval timeout;
/* struct fd_set wset;
FD_ZERO(&wset);
FD_SET(sv[0], &wset); */
int oldi=0;
pid1 = fork();
if(pid1==0) {
oldi=-1;
for(i=0, j=0; i<n; i+=r, j++) {
if(udp && ((j>256) || (i-oldi)>BIGGEST)) {
write(sv[2], ".", 1);
oldi+=BIGGEST;
j=0;
}
r=read(sv[0], buffer, m);
if(r<0)
fprintf(stderr,"read error %d@%d\n", errno, i), exit(errno);
}
/* printf("read %i bytes\n", i); */
exit(0);
}
pid2 = fork();
if(pid2==0) {
char dummy[1];
struct timeval timeout;
timeout.tv_sec=0;
timeout.tv_usec=0;
select(0, NULL, NULL, NULL, &timeout);
for(i=0; i<m; i++)
buffer[i]=i;
for(i=0, j=0; i<n; i+=r, j++) {
if(udp && ((j>256) || ((i-oldi)>BIGGEST))) {
read(sv[3], dummy, 1);
oldi+=BIGGEST;
j=0;
}
r=write(sv[1], buffer, m);
if(r<0)
fprintf(stderr,"write error %d@%d\n", errno, i), exit(errno);
}
/* printf("wrote %i bytes\n", i); */
exit(0);
}
{
struct rusage usage1, usage2;
double start, end, usr, sys, tot;
start = gettime();
printf("%4s (%5d*%4d): ", argv[1], n/m, m);
fflush(stdout);
wait4(pid1, NULL, 0, &usage1);
wait4(pid2, NULL, 0, &usage2);
end = gettime();
usr=TIME(usage1.ru_utime)+TIME(usage2.ru_utime);
sys=TIME(usage1.ru_stime)+TIME(usage2.ru_stime);
tot=end-start;
printf("user=%.2f, system=%.2f, elapsed=%.2f, CPU=%d%%, MB/s=%.6f\n",
usr, sys, tot, (int)((usr+sys)/(tot*1e-2)), 2*n/(tot*1024*1024));
}
}
}
------------------------------socktest.size--------------------------------
#!/bin/bash
for sock in unix udp tcp
do
for size in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192
do
socktest $sock $1 $size
done
done
---------------------------------------------------------------------------
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html