Re: string-486.h?

From: Artur Skawina (skawina@geocities.com)
Date: Sat Feb 26 2000 - 02:54:35 EST


Horst von Brand wrote:
>
> There was a great fight over that issue then, with people claiming that the
> kernel reimplementations where _much_ better than what gcc-2.7.2.3 gave.
> Maybe they are, but using 2.7.2.3 is just bowing to political correctness

ok, what are you proposing? going even further than the attached patch from
those days? can you make gcc2.95 use more builtins, while also never falling
back to the outofline versions?..

diff -urNp /img/linux-2.3.47pre3/include/asm-i386/string.h linux-2.3.47pre3as/include/asm-i386/string.h
--- /img/linux-2.3.47pre3/include/asm-i386/string.h Fri Feb 18 01:39:57 2000
+++ linux-2.3.47pre3as/include/asm-i386/string.h Fri Feb 18 04:33:42 2000
@@ -148,36 +148,43 @@ return __res;
 #define __HAVE_ARCH_STRCHR
 extern inline char * strchr(const char * s, int c)
 {
-int d0;
 register char * __res;
+/* leave the compiler some room to play with registers.
+ (also no move/shift of 'c' required)
+ */
 __asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "je 2f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "movl $1,%1\n"
- "2:\tmovl %1,%0\n\t"
- "decl %0"
- :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
+ "1: movb (%1),%h0\n"
+ " cmpb %b0,%h0\n"
+ " je 2f\n"
+ " incl %1\n"
+ " testb %h0,%h0\n"
+ " jne 1b\n"
+ " xorl %1,%1\n"
+ "2:"
+ :"=&q" (c), "=&r" (__res)
+ :"0" ((u8)c), "1" (s));
 return __res;
 }
 
 #define __HAVE_ARCH_STRRCHR
 extern inline char * strrchr(const char * s, int c)
 {
-int d0, d1;
+int d0;
 register char * __res;
 __asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "jne 2f\n\t"
- "leal -1(%%esi),%0\n"
- "2:\ttestb %%al,%%al\n\t"
- "jne 1b"
- :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
+ "1: movb (%2),%h0\n\t"
+ " cmpb %b0,%h0\n\t"
+#if CPU!=686
+ " jne 2f\n\t"
+ " movl %2,%1\n"
+#else
+ " cmovnel %2,%1\n"
+#endif
+ "2: incl %2\n"
+ " testb %h0,%h0\n\t"
+ " jne 1b"
+ : "=&q" (c), "=&r" (__res), "=&r" (d0)
+ : "0" (c), "1" (0), "2" (s));
 return __res;
 }
 
@@ -191,24 +198,25 @@ __asm__ __volatile__(
         "scasb\n\t"
         "notl %0\n\t"
         "decl %0"
- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
+ :"=&c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
 return __res;
 }
+#define strlen __builtin_strlen
 
 extern inline void * __memcpy(void * to, const void * from, size_t n)
 {
-int d0, d1, d2;
+int d0, d1, d2, d3;
 __asm__ __volatile__(
+ "shrl $2, %0\n\t"
         "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
+ "jnc 1f\n\t"
         "movsw\n"
- "1:\ttestb $1,%b4\n\t"
+ "1:\ttestb $1,%b3\n\t"
         "je 2f\n\t"
         "movsb\n"
         "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2), "=&q" (d3)
+ :"0" (n), "3" (n),"1" ((long) to),"2" ((long) from)
         : "memory");
 return (to);
 }
@@ -326,10 +334,18 @@ extern __inline__ void *__memcpy3d(void
  * No 3D Now!
  */
  
+#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)
+/* for gcc2.95+ the builtin is clearly better (even though it uses CLD) */
+#define memcpy(t, f, n) \
+(__builtin_constant_p(n) ? \
+ __builtin_memcpy((t),(f),(n)) : \
+ __memcpy((t),(f),(n)))
+#else
 #define memcpy(t, f, n) \
 (__builtin_constant_p(n) ? \
  __constant_memcpy((t),(f),(n)) : \
  __memcpy((t),(f),(n)))
+#endif
 
 #endif
 
@@ -475,6 +491,36 @@ extern inline void * __constant_c_and_co
                 case 4:
                         *(unsigned long *)s = pattern;
                         return s;
+ case 5:
+ *(unsigned long *)s = pattern;
+ *(4+(unsigned char *)s) = pattern;
+ return s;
+ case 6:
+ *(unsigned long *)s = pattern;
+ *(2+(unsigned short *)s) = pattern;
+ return s;
+ case 8:
+ *(unsigned long *)s = pattern;
+ *(1+(unsigned long *)s) = pattern;
+ return s;
+ case 12:
+ *(unsigned long *)s = pattern;
+ *(1+(unsigned long *)s) = pattern;
+ *(2+(unsigned long *)s) = pattern;
+ return s;
+ case 16:
+ *(unsigned long *)s = pattern;
+ *(1+(unsigned long *)s) = pattern;
+ *(2+(unsigned long *)s) = pattern;
+ *(3+(unsigned long *)s) = pattern;
+ return s;
+ case 20:
+ *(unsigned long *)s = pattern;
+ *(1+(unsigned long *)s) = pattern;
+ *(2+(unsigned long *)s) = pattern;
+ *(3+(unsigned long *)s) = pattern;
+ *(4+(unsigned long *)s) = pattern;
+ return s;
         }
 #define COMMON(x) \
 __asm__ __volatile__( \
@@ -496,10 +542,20 @@ __asm__ __volatile__( \
 #undef COMMON
 }
 
+#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)
+/* for gcc2.95+ use the builtin when c==0 */
+#define __constant_c_x_memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ ((c)!=0 ? \
+ __constant_c_and_count_memset((s),(c),(count)) : \
+ __builtin_memset((s),0,(count))) : \
+ __constant_c_memset((s),(c),(count)))
+#else
 #define __constant_c_x_memset(s, c, count) \
 (__builtin_constant_p(count) ? \
  __constant_c_and_count_memset((s),(c),(count)) : \
  __constant_c_memset((s),(c),(count)))
+#endif
 
 #define __memset(s, c, count) \
 (__builtin_constant_p(count) ? \

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Feb 29 2000 - 21:00:15 EST