Re: .../asm-i386/bitops.h performance improvements

From: cutaway
Date: Wed Jun 15 2005 - 13:27:47 EST


----- Original Message -----
From: "Gene Heskett" <gene.heskett@xxxxxxxxxxx>
To: <linux-kernel@xxxxxxxxxxxxxxx>
> >
> To what cpu families does this apply? eg, this may be true for intel,
> but what about amd, via etc?

You tell me -- I've included below a small benchmark that compares them.

These are the results I've gotten so far:

LEA SHL/ADD
---------------------------------------
Pentium Pro 200 88sec 96sec
AMD K6/2-500 29sec 48sec
386SLC(386SX core) 2966sec 4932sec

If LEA isn't fast, those CPU's you mentioned have much bigger problems than
these two inline functions because GCC always generates (with the kernel
default -O2 at least) an LEA for things like this:

unsigned int foo(unsigned int bar)
{
return ((bar<<3)+bar);
}

----------- LEA vs SHL/ADD ----------

#include <stdio.h>
#include <time.h>

#define ITERATIONS 2000000L

#define START start = time(&start);
#define STOP stop = time(&stop); delta = stop - start;
#define SUMMARY(s) printf(s " [%ld] seconds\n",delta);
#define TESTLOOP for (i=0; i<ITERATIONS; i++)

static void inline shl(void)
{
__asm__("shll $3,%edi; addl %edi,%eax");
}

static void inline lea(void)
{
__asm__("leal (%eax,%edi,8),%eax");
}


int main(int argc, char *argv[], char *envp[])
{
time_t start, stop, delta;
int i;

START;
TESTLOOP
{
#undef T
#define T shl();shl();shl();shl();shl();shl();shl();shl();shl();shl();
#define T100 T T T T T T T T T T T
#define T1000 T100 T100 T100 T100 T100 T100 T100 T100 T100 T100

__asm__ __volatile__("pushl %eax");
__asm__ __volatile__("pushl %edi");
T1000 T1000 T1000 T1000 T1000 T1000
__asm__ __volatile__("popl %edi");
__asm__ __volatile__("popl %eax");
}
STOP;
SUMMARY("SHL/ADD");


/*---------------------------------------------------*/

START;
TESTLOOP
{
#undef T
#define T lea();lea();lea();lea();lea();lea();lea();lea();lea();lea();
#define T100 T T T T T T T T T T T
#define T1000 T100 T100 T100 T100 T100 T100 T100 T100 T100 T100

__asm__ __volatile__("pushl %eax");
__asm__ __volatile__("pushl %edi");
T1000 T1000 T1000 T1000 T1000 T1000
__asm__ __volatile__("popl %edi");
__asm__ __volatile__("popl %eax");
}
STOP;
SUMMARY("LEA");

return 0;
}


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/