2.6.0 Huge pages not working as expected

From: Nick Craig-Wood
Date: Fri Dec 26 2003 - 05:56:01 EST


I've been trying out the huge page support using 2.6.0. I compiled
with :-

CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y

And all looks good in /proc/meminfo

HugePages_Total: 8
HugePages_Free: 8
Hugepagesize: 4096 kB

I mounted a hugetlbfs on /mnt/hugetlb.

I wrote a little test program to show the benefits of huge pages by
reducing TLB thrashing - it fills up 16 MB with sequential numbers
then adds them with different strides - very much the sort of thing
FFTs do. However huge pages show a performance decrease not increase
for large strides! For smaller ones there is a small speedup.

I've been testing on

processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 8
model name : Pentium III (Coppermine)
stepping : 6
cpu MHz : 551.405
cache size : 256 KB
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 2
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 sep mtrr pge mca cmov pat pse36 mmx fxsr sse
bogomips : 1089.53

Whats happening? Is there something broken in my program, the kernel,
or my understanding? I know this isn't a particularly good
demonstration of reducing TLB thrashing as it should only read in
cacheline multiples, but I wasn't expecting it to slow down!

I've also been trying huge pages with mprime (which does lots of FFTs)
which does show some improvement (just 2% or so because it is already
very TLB aware).

Here are the results :-

------------------------------------------------------------
Memory from malloc()
Testing memory at 0x4015e008
span = 1, time = 71.212 ms, total = -2097152
span = 2, time = 71.744 ms, total = -2097152
span = 4, time = 88.352 ms, total = -2097152
span = 8, time = 176.207 ms, total = -2097152
span = 16, time = 176.166 ms, total = -2097152
span = 32, time = 176.385 ms, total = -2097152
span = 64, time = 179.042 ms, total = -2097152
span = 128, time = 184.059 ms, total = -2097152
span = 256, time = 195.014 ms, total = -2097152
span = 512, time = 217.084 ms, total = -2097152
span = 1024, time = 260.899 ms, total = -2097152
span = 2048, time = 259.714 ms, total = -2097152
span = 4096, time = 261.059 ms, total = -2097152

Memory from hugetlbfs
Testing memory at 0x41400000
span = 1, time = 70.815 ms, total = -2097152
span = 2, time = 71.261 ms, total = -2097152
span = 4, time = 88.178 ms, total = -2097152
span = 8, time = 175.512 ms, total = -2097152
span = 16, time = 174.996 ms, total = -2097152
span = 32, time = 175.689 ms, total = -2097152
span = 64, time = 177.301 ms, total = -2097152
span = 128, time = 181.705 ms, total = -2097152
span = 256, time = 191.232 ms, total = -2097152
span = 512, time = 209.886 ms, total = -2097152
span = 1024, time = 247.646 ms, total = -2097152
span = 2048, time = 279.525 ms, total = -2097152
span = 4096, time = 344.605 ms, total = -2097152

Memory from /dev/zero
Testing memory at 0x42400000
span = 1, time = 70.916 ms, total = -2097152
span = 2, time = 71.405 ms, total = -2097152
span = 4, time = 89.584 ms, total = -2097152
span = 8, time = 176.190 ms, total = -2097152
span = 16, time = 175.730 ms, total = -2097152
span = 32, time = 176.377 ms, total = -2097152
span = 64, time = 178.675 ms, total = -2097152
span = 128, time = 183.429 ms, total = -2097152
span = 256, time = 194.153 ms, total = -2097152
span = 512, time = 215.089 ms, total = -2097152
span = 1024, time = 256.428 ms, total = -2097152
span = 2048, time = 268.468 ms, total = -2097152
span = 4096, time = 268.702 ms, total = -2097152
------------------------------------------------------------

And here is the program...

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/mman.h>

#define MEMORY_FILE_NAME "/mnt/hugetlb/z"
#define MEMORY_SIZE (4*1024*1024)

/****************************************************************************
Returns the time in floating point seconds since the epoch - useful for more
accurate timing that time() allows for
****************************************************************************/

static double timef(void)
{
struct timeval tv = {0, 0};
gettimeofday(&tv, 0);
return (double)tv.tv_sec + ((double)tv.tv_usec)/1E6;
}

/****************************************************************************
Test the memory with different spans - should show TLB thrashing nicely
****************************************************************************/

static void test(int *p)
{
int i;
int span;

printf("Testing memory at %p\n", p);

/* fill it */
for (i = 0; i < MEMORY_SIZE; i++)
p[i] = i;

/* test it with different spans */
for (span = 1; span <= 4096; span *= 2)
{
double start = timef();
int j;
int total = 0;

for (j = 0; j < span; j++)
{
for (i = j; i < MEMORY_SIZE; i+= span)
total += p[i];
}
start = timef() - start;
printf("span = %8d, time = %10.3f ms, total = %d\n", span, 1000*start, total);
}
printf("\n");
}

/****************************************************************************
Thrash the hugetlb
****************************************************************************/

int main(void)
{
int *malloc_memory;
int *hugepage_memory;
int *devzero_memory;
int fd;

/* get some malloc memory */
malloc_memory = calloc(MEMORY_SIZE, sizeof(int));
if (malloc_memory == 0)
{
fprintf(stderr, "Couldn't allocate memory\n");
exit(EXIT_FAILURE);
}

/* get some hugepage memory */
fd = open(MEMORY_FILE_NAME, O_CREAT|O_RDWR, 0600);
if (fd < 0)
{
fprintf(stderr, "Failed to open huge page memory file '%s': %s\n", MEMORY_FILE_NAME, strerror(errno));
exit(EXIT_FAILURE);
}
hugepage_memory = mmap(0, MEMORY_SIZE * sizeof(int), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (hugepage_memory == MAP_FAILED)
{
fprintf(stderr, "Huge page mmap() failed: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}

/* get some /dev/zero memory */
fd = open("/dev/zero", O_CREAT|O_RDWR, 0600);
if (fd < 0)
{
fprintf(stderr, "Failed to open /dev/zero memory file: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
devzero_memory = mmap(0, MEMORY_SIZE * sizeof(int), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (devzero_memory == MAP_FAILED)
{
fprintf(stderr, "Huge page mmap() failed: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}

printf("Memory from malloc()\n");
test(malloc_memory);

printf("Memory from hugetlbfs\n");
test(hugepage_memory);

printf("Memory from /dev/zero\n");
test(devzero_memory);

unlink(MEMORY_FILE_NAME);

return EXIT_SUCCESS;
}


--
Nick Craig-Wood
ncw1@xxxxxxxxxxxxxxxx
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/