Re: [RFC][PATCH RT] rwsem_rt: Another (more sane) approach to mulitreader rt locks

From: Steven Rostedt
Date: Tue May 15 2012 - 12:26:21 EST


On Tue, 2012-05-15 at 10:03 -0400, Steven Rostedt wrote:

> I'll see if I can get some numbers to see how this fixes the issues with
> multi threads on big boxes.
>

I couldn't get access to the big box, so I wrote my own test. The
attached program is what I used. It creates 400 threads and allocates a
memory range (with mmap) of 10 gigs. Then it runs all 400 threads, where
each is fighting to read this new memory. Causing lots of page faults.

I tested on a 4 CPU box with 3.4.0-rc7-rt6:

Without the patch:

map=10737418240
time = 11302617 usecs
map=10737418240
time = 11229341 usecs
map=10737418240
time = 11171463 usecs
map=10737418240
time = 11435549 usecs
map=10737418240
time = 11299086 usecs


With the patch:

map=10737418240
time = 6493796 usecs
map=10737418240
time = 6726186 usecs
map=10737418240
time = 3978194 usecs
map=10737418240
time = 6796688 usecs


So it went from roughly 11 secs to 6 secs (even had one 4sec run). This
shows that it sped up the fault access by almost half.


-- Steve

/*
* Copyright 2012, Steven Rostedt
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <sched.h>
#include <pthread.h>
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <sys/mman.h>

#define THREADS 400
//#define MEM (4096ULL*400)
#define MEM (10ULL*4096*1024*1024/4)

#define nano2sec(nan) (nan / 1000000000ULL)
#define nano2ms(nan) (nan / 1000000ULL)
#define nano2usec(nan) (nan / 1000ULL)
#define usec2nano(sec) (sec * 1000ULL)
#define ms2nano(ms) (ms * 1000000ULL)
#define sec2nano(sec) (sec * 1000000000ULL)
#define sec2usec(sec) (sec * 1000000ULL)

static char *data;

static pthread_barrier_t start_barrier;
static pthread_barrier_t stop_barrier;

static void perr(char *fmt, ...)
{
char buffer[BUFSIZ];
va_list ap;

va_start(ap, fmt);
vsnprintf(buffer, BUFSIZ, fmt, ap);
va_end(ap);

perror(buffer);
fflush(stderr);
exit(-1);
}

void *func(void *dat)
{
unsigned long id = (unsigned long)dat;
static char x;
unsigned long i;

pthread_barrier_wait(&start_barrier);

for (i = id * 4096; i < MEM; i += 4096 * THREADS) {
x = data[i];
}

pthread_barrier_wait(&stop_barrier);

return NULL;
}

static unsigned long long get_time(void)
{
struct timeval tv;
unsigned long long time;

gettimeofday(&tv, NULL);

time = sec2usec(tv.tv_sec);
time += tv.tv_usec;

return time;
}

void run_test(int threads)
{
pthread_t t[threads];
unsigned long long start, end;
unsigned long i;

for (i=0; i < threads; i++) {
if (pthread_create(&t[i], NULL, func, (void *)i)) {
perror("pthread_creat");
exit(-1);
}
}

start = get_time();
pthread_barrier_wait(&start_barrier);
pthread_barrier_wait(&stop_barrier);
end = get_time();

printf("time = %lld usecs\n", end - start);

for (i=0; i < threads; i++) {
pthread_join(t[i], NULL);
}
}

int main (int argc, char **argv)
{
int threads = THREADS;
int ret;

ret = pthread_barrier_init(&start_barrier, NULL, threads + 1);
if (ret < 0)
perr("pthread_barrier_init");

ret = pthread_barrier_init(&stop_barrier, NULL, threads + 1);
if (ret < 0)
perr("pthread_barrier_init");

printf("map=%lld\n", MEM);
data = mmap(NULL, MEM, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (data == MAP_FAILED)
perr("mmap");

run_test(threads);

exit(0);

return 0;
}