[PATCH] 386 asm cleanup

Colin Plumb (colin@nyx.net)
Sun, 19 Apr 1998 04:35:56 -0600 (MDT)


After going to all that trouble to *describe* how to do it right,
I figured I ought to submit a patch.

I reviewed every asm() in include/asm-i386 and corrected every problem
I saw. This compiles, but I haven't gotten around to rebooting yet. I
ask not for brave testers, bit for another pair of eyes to desk-check
some of the bits like <asm/smp-lock.h> that I can't test at all.

In the meantime, I'll reboot and try it in practice and then submit
it for real testing.

Thanks.

(As an aside, it's awfully nice that I *do* have to make a deliberate
decision to reboot my machine.)

<asm/atomic.h> failed to require that an input/output operand used the
same address for input and output, which struck me as wrong.
<asm/bitops.h> had find_first_zero_bit requiring specific registers too
much and had registers required for input and marked as
clobbered, which is a no-no that recent GCCs complain about.
<asm/bugs.h> had an overspecific register requirement in no_387
and I'm dubious of check_popad. I left the existing behaviour
alone, but could someone please clarify what is going on?
Altavista search for "popad bug" wil turn up a posting I
made on the subject some time ago that quotes Intel errata
that seems to indicate the other version I included is the
correct test. (Checking that it fetched the right word
is just a little additional paranoia.)
Also, some K6 code failed to note the rdtsc clobbers edx.
<asm/floppy.h> I generalized a lot of registers. Perhaps someone can
check that I didn't overdo it, because there are in and out
instructions that need port numbers in %dx and data in
%eax/%ax/%al. I changed some inc/decs of %dx to alter %edx
instead to save a byte and a cycle.
<asm/smp_lock.h> PLEASE CHECK THIS. I couldn't see how unlock_kernel
could trash %eax so I deleted it from the clobbers list, but
just maybe there's something subtle going on.
<asm/system.h> Lots of changes. As I described in my earlier mail.

-- 
	-Colin

diff -u old/atomic.h ./atomic.h --- linux/include/asm-i386/atomic.h Thu Dec 4 00:00:33 1997 +++ linux/include/asm-i386/atomic.h Sat Apr 18 23:53:00 1998 @@ -35,7 +35,7 @@ __asm__ __volatile__( LOCK "addl %1,%0" :"=m" (__atomic_fool_gcc(v)) - :"ir" (i), "m" (__atomic_fool_gcc(v))); + :"ir" (i), "0" (__atomic_fool_gcc(v))); } static __inline__ void atomic_sub(int i, volatile atomic_t *v) @@ -43,7 +43,7 @@ __asm__ __volatile__( LOCK "subl %1,%0" :"=m" (__atomic_fool_gcc(v)) - :"ir" (i), "m" (__atomic_fool_gcc(v))); + :"ir" (i), "0" (__atomic_fool_gcc(v))); } static __inline__ void atomic_inc(volatile atomic_t *v) @@ -51,7 +51,7 @@ __asm__ __volatile__( LOCK "incl %0" :"=m" (__atomic_fool_gcc(v)) - :"m" (__atomic_fool_gcc(v))); + :"0" (__atomic_fool_gcc(v))); } static __inline__ void atomic_dec(volatile atomic_t *v) @@ -59,7 +59,7 @@ __asm__ __volatile__( LOCK "decl %0" :"=m" (__atomic_fool_gcc(v)) - :"m" (__atomic_fool_gcc(v))); + :"0" (__atomic_fool_gcc(v))); } static __inline__ int atomic_dec_and_test(volatile atomic_t *v) @@ -69,8 +69,8 @@ __asm__ __volatile__( LOCK "decl %0; sete %1" :"=m" (__atomic_fool_gcc(v)), "=qm" (c) - :"m" (__atomic_fool_gcc(v))); - return c != 0; + :"0" (__atomic_fool_gcc(v))); + return c & 1; } /* These are x86-specific, used by some header files */ diff -u old/bitops.h ./bitops.h --- linux/include/asm-i386/bitops.h Thu Dec 4 00:00:33 1997 +++ linux/include/asm-i386/bitops.h Sun Apr 19 02:09:49 1998 @@ -128,23 +128,24 @@ extern __inline__ int find_first_zero_bit(void * addr, unsigned size) { int res; + int _dummy1, _dummy2; if (!size) return 0; __asm__("cld\n\t" "movl $-1,%%eax\n\t" - "xorl %%edx,%%edx\n\t" + "xorl %0,%0\n\t" "repe; scasl\n\t" "je 1f\n\t" "xorl -4(%%edi),%%eax\n\t" "subl $4,%%edi\n\t" - "bsfl %%eax,%%edx\n" - "1:\tsubl %%ebx,%%edi\n\t" + "bsfl %%eax,%0\n" + "1:\tsubl %5,%%edi\n\t" "shll $3,%%edi\n\t" - "addl %%edi,%%edx" - :"=d" (res) - :"c" ((size + 31) >> 5), "D" (addr), "b" (addr) - :"ax", "cx", "di"); + "addl %%edi,%0" + :"=r" (res), "=c" (_dummy1), "=D" (_dummy2) + :"c" ((size + 31) >> 5), "D" (addr), "r" (addr) + :"ax"); return res; } diff -u old/bugs.h ./bugs.h --- linux/include/asm-i386/bugs.h Tue Apr 14 16:16:35 1998 +++ linux/include/asm-i386/bugs.h Sun Apr 19 02:15:07 1998 @@ -23,10 +23,13 @@ __initfunc(static void no_387(char *s, int *ints)) { + int _dummy; + boot_cpu_data.hard_math = 0; - __asm__("movl %%cr0,%%eax\n\t" - "orl $0xE,%%eax\n\t" - "movl %%eax,%%cr0\n\t" : : : "ax"); + + __asm__("movl %%cr0,%0\n\t" + "orl $0xE,%0\n\t" + "movl %0,%%cr0\n\t" : "=r" (_dummy)); } static char __initdata fpu_error = 0; @@ -127,9 +130,17 @@ /* * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. + * machine even from user space. We can't fix it, but an + * __initfunc to warn folks is a cheap convenience. + * + * FIXME: this test isn't checking for the POPAD bug that Intel + * errata describe, which requires that eax be used as a base + * register in the following instruction. Will someone either fix + * this or document in this comment what the bug it's checking for + * actually is? <colin@nyx.net> */ +#if 0 __initfunc(static void check_popad(void)) { #ifdef CONFIG_M386 @@ -137,15 +148,34 @@ printk(KERN_INFO "Checking for popad bug... "); __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=eax" (res) - : "edx" (inp) - : "eax", "ecx", "edx", "edi" ); + "pusha; popa; movl (%%edx,%%edi),%%ecx " + : "=a" (res), "=d" (inp) + : "0" (12345678), "1" (inp), "D" (0) + : "ecx"); /* If this fails, it means that any user program may lock CPU hard. Too bad. */ if (res != 12345678) printk( "Buggy.\n" ); else printk( "Ok.\n" ); #endif } +#else /* What I think it should be - colin@nyx.net */ +__initfunc(static void check_popad(void)) +{ +#ifdef CONFIG_M386 + unsigned const mem = 0xbadf00d; + unsigned eax; + unsigned dummy; + + printk(KERN_INFO "Checking for popad bug... "); + __asm__("pusha; xorl %%eax,%%eax; popa; movl (%%eax,%3),%1" + : "=a" (eax), + "=r" (dummy) + : "0" (12345678), + "r" ((unsigned)&mem - 12345678)); + /* If this fails, it means that any user program may lock CPU hard. Too bad. */ + printk(eax == 0x12345678 && dummy == mem ? "Ok\n" : "Buggy.\n"); +#endif +} +#endif /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause @@ -184,10 +214,10 @@ n = K6_BUG_LOOP; f_vide = vide; - __asm__ ("rdtsc" : "=a" (d)); + __asm__ ("rdtsc" : "=a" (d) : : "dx"); while (n--) f_vide(); - __asm__ ("rdtsc" : "=a" (d2)); + __asm__ ("rdtsc" : "=a" (d2) : : "dx"); d = d2-d; /* Knock these two lines out if it debugs out ok */ diff -u old/floppy.h ./floppy.h --- linux/include/asm-i386/floppy.h Tue Apr 14 16:22:16 1998 +++ linux/include/asm-i386/floppy.h Sun Apr 19 02:19:57 1998 @@ -81,7 +81,7 @@ andb $160,%b0 cmpb $160,%b0 jne 2f - incw %w4 + incl %4 testl %3,%3 jne 4f inb %w4,%b0 @@ -89,7 +89,7 @@ jmp 5f 4: movb (%2),%0 outb %b0,%w4 -5: decw %w4 +5: decl %4 outb %0,$0x80 decl %1 incl %2 @@ -98,10 +98,10 @@ 3: inb %w4,%b0 2: " : "=a" ((char) st), - "=c" ((long) virtual_dma_count), - "=S" ((long) virtual_dma_addr) - : "b" ((long) virtual_dma_mode), - "d" ((short) virtual_dma_port+4), + "=r" ((long) virtual_dma_count), + "=r" ((long) virtual_dma_addr) + : "r" ((long) virtual_dma_mode), + "d" ((long) virtual_dma_port+4), "1" ((long) virtual_dma_count), "2" ((long) virtual_dma_addr)); #else Only in .: old diff -u old/smp_lock.h ./smp_lock.h --- linux/include/asm-i386/smp_lock.h Sat Jan 31 00:29:56 1998 +++ linux/include/asm-i386/smp_lock.h Sun Apr 19 02:16:49 1998 @@ -83,7 +83,7 @@ popfl " : /* no outputs */ : "m" (current->lock_depth), "i" (NO_PROC_ID) - : "ax", "memory"); + : "memory"); } #endif /* __SMP__ */ diff -u old/system.h ./system.h --- linux/include/asm-i386/system.h Tue Apr 14 16:11:05 1998 +++ linux/include/asm-i386/system.h Sun Apr 19 02:16:26 1998 @@ -29,10 +29,10 @@ #define load_TR(n) __asm__ __volatile__("ltr %%ax": /* no output */ :"a" (_TSS(n))) #define load_ldt(n) __asm__ __volatile__("lldt %%ax": /* no output */ :"a" (_LDT(n))) #define store_TR(n) \ -__asm__("str %%ax\n\t" \ - "subl %2,%%eax\n\t" \ - "shrl $4,%%eax" \ - :"=a" (n) \ +__asm__("str %w0\n\t" \ + "subl %2,%0\n\t" \ + "shrl $4,%0" \ + :"=r" (n) \ :"0" (0),"i" (FIRST_TSS_ENTRY<<3)) /* This special macro can be used to load a debugging register */ @@ -113,49 +113,51 @@ } while (0) #endif -#define _set_base(addr,base) \ -__asm__("movw %%dx,%0\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %%dl,%1\n\t" \ - "movb %%dh,%2" \ - : /* no output */ \ - :"m" (*((addr)+2)), \ - "m" (*((addr)+4)), \ - "m" (*((addr)+7)), \ - "d" (base) \ - :"dx") - -#define _set_limit(addr,limit) \ -__asm__("movw %%dx,%0\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %1,%%dh\n\t" \ - "andb $0xf0,%%dh\n\t" \ - "orb %%dh,%%dl\n\t" \ - "movb %%dl,%1" \ - : /* no output */ \ - :"m" (*(addr)), \ - "m" (*((addr)+6)), \ - "d" (limit) \ - :"dx") +#define _set_base(addr,base) \ +do { int _dummy; \ +__asm__("movw %w0,%1\n\t" \ + "rorl $16,%0\n\t" \ + "movb %b0,%2\n\t" \ + "movb %h0,%3" \ + :"=q" (_dummy) \ + "=m" (*((addr)+2)), \ + "=m" (*((addr)+4)), \ + "=m" (*((addr)+7)), \ + :"0" (base)); \ +while (0) + +#define _set_limit(addr,limit) \ +do { int _dummy; \ +__asm__("movw %w0,%1\n\t" \ + "rorl $16,%0\n\t" \ + "movb %2,%h0\n\t" \ + "andb $0xf0,%h0\n\t" \ + "orb %h0,%b0\n\t" \ + "movb %b0,%2" \ + :"=q" (_dummy), \ + "=m" (*(addr)), \ + "=m" (*((addr)+6)), \ + :"0" (limit)); \ +while (0) #define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) #define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) -static inline unsigned long _get_base(char * addr) +static inline unsigned long _get_base(char const * addr) { unsigned long __base; - __asm__("movb %3,%%dh\n\t" - "movb %2,%%dl\n\t" - "shll $16,%%edx\n\t" - "movw %1,%%dx" - :"=&d" (__base) + __asm__("movb %3,%h0\n\t" + "movb %2,%b0\n\t" + "shll $16,%0\n\t" + "movw %1,%w0" + :"=&r" (__base) :"m" (*((addr)+2)), "m" (*((addr)+4)), "m" (*((addr)+7))); return __base; } -#define get_base(ldt) _get_base( ((char *)&(ldt)) ) +#define get_base(ldt) _get_base( ((char const *)&(ldt)) ) static inline unsigned long get_limit(unsigned long segment) { @@ -171,15 +173,14 @@ * Clear and set 'TS' bit respectively */ #define clts() __asm__ __volatile__ ("clts") -#define stts() \ -__asm__ __volatile__ ( \ - "movl %%cr0,%%eax\n\t" \ - "orl $8,%%eax\n\t" \ - "movl %%eax,%%cr0" \ - : /* no outputs */ \ - : /* no inputs */ \ - :"ax") - +#define stts() \ +do { int _dummy; \ +__asm__ __volatile__ ( \ + "movl %%cr0,%0\n\t" \ + "orl $8,%0\n\t" \ + "movl %0,%%cr0" \ + : "=r" (_dummy)); \ +} while (0) #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) #define tas(ptr) (xchg((ptr),1)) @@ -251,16 +252,20 @@ #endif -#define _set_gate(gate_addr,type,dpl,addr) \ -__asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %2,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))) \ +#define _set_gate(gate_addr,type,dpl,addr) \ +do { int _dummy1, _dummy2; \ +__asm__ __volatile__ ("movw %w2,%w3\n\t" \ + "movw %4,%w2\n\t" \ + "movl %3,%0\n\t" \ + "movl %2,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), \ + "=r" (_dummy1), \ + "=r" (_dummy2) \ :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "d" ((char *) (addr)),"a" (__KERNEL_CS << 16) \ - :"ax","dx") + "2" ((char *) (addr)), \ + "3" (__KERNEL_CS << 16)); \ +} while (0) #define set_intr_gate(n,addr) \ _set_gate(idt+(n),14,0,addr) @@ -285,15 +290,21 @@ ((limit) & 0x0ffff); } #define _set_tssldt_desc(n,addr,limit,type) \ -__asm__ __volatile__ ("movw %3,0(%2)\n\t" \ - "movw %%ax,2(%2)\n\t" \ - "rorl $16,%%eax\n\t" \ - "movb %%al,4(%2)\n\t" \ - "movb %4,5(%2)\n\t" \ - "movb $0,6(%2)\n\t" \ - "movb %%ah,7(%2)\n\t" \ - "rorl $16,%%eax" \ - : "=m"(*(n)) : "a" (addr), "r"(n), "i"(limit), "i"(type)) +__asm__ __volatile__ ("movw %w7,%0\n\t" \ + "movw %w6,%1\n\t" \ + "rorl $16,%6\n\t" \ + "movb %b6,%2\n\t" \ + "movb %b8,%3\n\t" \ + "movb $0,%4\n\t" \ + "movb %h6,%5\n\t" \ + "rorl $16,%6" \ + : "=m"(*(n)), \ + "=m"((n)[2]), \ + "=m"((n)[4]), \ + "=m"((n)[5]), \ + "=m"((n)[6]), \ + "=m"((n)[7]) \ + : "a" (addr), "ir"(limit), "ir"(type)) #define set_tss_desc(n,addr) \ _set_tssldt_desc(((char *) (n)),((int)(addr)),235,0x89)

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu