Re: [PATCH 1/10] Cr4 is valid on some 486s

From: Gerd Knorr
Date: Mon Nov 14 2005 - 10:06:33 EST


Hi,

We _could_ nop out the actual conditional on the lock result for a spinlock, and turn

lock ; decb %0
js ...

into

nop ; decb %0
multi-byte-nop

Throwing another patch into the discussion ;)

Comes from some xen guy. If I read the thing correctly it builds a elf section containing a table with both smp and up versions of the code path, then patching in the one needed at runtime. Allows patching both directions (up->smp, smp->up) at runtime, for hotplugging (virtual) CPU's. I'm not a inline asm expert though ...

Comments on that one?

Gerd diff -Naur linux-2.6.12/arch/i386/Kconfig linux-2.6.12.post/arch/i386/Kconfig
--- linux-2.6.12/arch/i386/Kconfig 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/arch/i386/Kconfig 2005-07-25 05:51:21.000000000 -0400
@@ -487,6 +487,19 @@

If you don't know what to do here, say N.

+config SMP_ALTERNATIVES
+ bool "SMP alternatives support (EXPERIMENTAL)"
+ depends on SMP && EXPERIMENTAL
+ help
+ Try to reduce the overhead of running an SMP kernel on a uniprocessor
+ host slightly by replacing certain key instruction sequences
+ according to whether we currently have more than one CPU available.
+ This should provide a noticeable boost to performance when
+ running SMP kernels on UP machines, and have negligible impact
+ when running on an true SMP host.
+
+ If unsure, say N.
+
config NR_CPUS
int "Maximum number of CPUs (2-255)"
range 2 255
diff -Naur linux-2.6.12/arch/i386/kernel/Makefile linux-2.6.12.post/arch/i386/kernel/Makefile
--- linux-2.6.12/arch/i386/kernel/Makefile 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/arch/i386/kernel/Makefile 2005-07-25 05:51:21.000000000 -0400
@@ -33,6 +33,7 @@
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o

EXTRA_AFLAGS := -traditional

diff -Naur linux-2.6.12/arch/i386/kernel/smpalts.c linux-2.6.12.post/arch/i386/kernel/smpalts.c
--- linux-2.6.12/arch/i386/kernel/smpalts.c 1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.12.post/arch/i386/kernel/smpalts.c 2005-07-25 05:51:21.000000000 -0400
@@ -0,0 +1,85 @@
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/smp_alt.h>
+#include <asm/processor.h>
+#include <asm/string.h>
+
+struct smp_replacement_record {
+ unsigned char targ_size;
+ unsigned char smp1_size;
+ unsigned char smp2_size;
+ unsigned char up_size;
+ unsigned char feature;
+ unsigned char data[0];
+};
+
+struct smp_alternative_record {
+ void *targ_start;
+ struct smp_replacement_record *repl;
+};
+
+extern struct smp_alternative_record __start_smp_alternatives_table,
+ __stop_smp_alternatives_table;
+extern unsigned long __init_begin, __init_end;
+
+void prepare_for_smp(void)
+{
+ struct smp_alternative_record *r;
+ printk(KERN_INFO "Enabling SMP...\n");
+ for (r = &__start_smp_alternatives_table;
+ r != &__stop_smp_alternatives_table;
+ r++) {
+ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
+ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
+ BUG_ON(r->repl->targ_size < r->repl->up_size);
+ if (system_state == SYSTEM_RUNNING &&
+ r->targ_start >= (void *)&__init_begin &&
+ r->targ_start < (void *)&__init_end)
+ continue;
+ if (r->repl->feature != (unsigned char)-1 &&
+ boot_cpu_has(r->repl->feature)) {
+ memcpy(r->targ_start,
+ r->repl->data + r->repl->smp1_size,
+ r->repl->smp2_size);
+ memset(r->targ_start + r->repl->smp2_size,
+ 0x90,
+ r->repl->targ_size - r->repl->smp2_size);
+ } else {
+ memcpy(r->targ_start,
+ r->repl->data,
+ r->repl->smp1_size);
+ memset(r->targ_start + r->repl->smp1_size,
+ 0x90,
+ r->repl->targ_size - r->repl->smp1_size);
+ }
+ }
+ /* Paranoia */
+ asm volatile ("jmp 1f\n1:");
+ mb();
+}
+
+void unprepare_for_smp(void)
+{
+ struct smp_alternative_record *r;
+ printk(KERN_INFO "Disabling SMP...\n");
+ for (r = &__start_smp_alternatives_table;
+ r != &__stop_smp_alternatives_table;
+ r++) {
+ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
+ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
+ BUG_ON(r->repl->targ_size < r->repl->up_size);
+ if (system_state == SYSTEM_RUNNING &&
+ r->targ_start >= (void *)&__init_begin &&
+ r->targ_start < (void *)&__init_end)
+ continue;
+ memcpy(r->targ_start,
+ r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
+ r->repl->up_size);
+ memset(r->targ_start + r->repl->up_size,
+ 0x90,
+ r->repl->targ_size - r->repl->up_size);
+ }
+ /* Paranoia */
+ asm volatile ("jmp 1f\n1:");
+ mb();
+}
diff -Naur linux-2.6.12/arch/i386/kernel/smpboot.c linux-2.6.12.post/arch/i386/kernel/smpboot.c
--- linux-2.6.12/arch/i386/kernel/smpboot.c 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/arch/i386/kernel/smpboot.c 2005-07-25 05:51:21.000000000 -0400
@@ -1001,6 +1001,11 @@
if (max_cpus <= cpucount+1)
continue;

+#ifdef CONFIG_SMP_ALTERNATIVES
+ if (kicked == 1)
+ prepare_for_smp();
+#endif
+
if (do_boot_cpu(apicid))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
@@ -1130,6 +1135,11 @@
return -EIO;
}

+#ifdef CONFIG_SMP_ALTERNATIVES
+ if (num_online_cpus() == 1)
+ prepare_for_smp();
+#endif
+
local_irq_enable();
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
diff -Naur linux-2.6.12/arch/i386/kernel/vmlinux.lds.S linux-2.6.12.post/arch/i386/kernel/vmlinux.lds.S
--- linux-2.6.12/arch/i386/kernel/vmlinux.lds.S 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/arch/i386/kernel/vmlinux.lds.S 2005-07-25 05:51:21.000000000 -0400
@@ -30,6 +30,13 @@
__ex_table : { *(__ex_table) }
__stop___ex_table = .;

+ . = ALIGN(16);
+ __start_smp_alternatives_table = .;
+ __smp_alternatives : { *(__smp_alternatives) }
+ __stop_smp_alternatives_table = .;
+
+ __smp_replacements : { *(__smp_replacements) }
+
RODATA

/* writeable */
diff -Naur linux-2.6.12/include/asm-i386/atomic.h linux-2.6.12.post/include/asm-i386/atomic.h
--- linux-2.6.12/include/asm-i386/atomic.h 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/include/asm-i386/atomic.h 2005-07-25 05:51:21.000000000 -0400
@@ -4,18 +4,13 @@
#include <linux/config.h>
#include <linux/compiler.h>
#include <asm/processor.h>
+#include <asm/smp_alt.h>

/*
* Atomic operations that C can't guarantee us. Useful for
* resource counting etc..
*/

-#ifdef CONFIG_SMP
-#define LOCK "lock ; "
-#else
-#define LOCK ""
-#endif
-
/*
* Make sure gcc doesn't try to be clever and move things around
* on us. We need to use _exactly_ the address the user gave us,
diff -Naur linux-2.6.12/include/asm-i386/bitops.h linux-2.6.12.post/include/asm-i386/bitops.h
--- linux-2.6.12/include/asm-i386/bitops.h 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/include/asm-i386/bitops.h 2005-07-25 05:51:21.000000000 -0400
@@ -7,6 +7,7 @@

#include <linux/config.h>
#include <linux/compiler.h>
+#include <asm/smp_alt.h>

/*
* These have to be done with inline assembly: that way the bit-setting
@@ -16,12 +17,6 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/

-#ifdef CONFIG_SMP
-#define LOCK_PREFIX "lock ; "
-#else
-#define LOCK_PREFIX ""
-#endif
-
#define ADDR (*(volatile long *) addr)

/**
@@ -41,7 +36,7 @@
*/
static inline void set_bit(int nr, volatile unsigned long * addr)
{
- __asm__ __volatile__( LOCK_PREFIX
+ __asm__ __volatile__( LOCK
"btsl %1,%0"
:"=m" (ADDR)
:"Ir" (nr));
@@ -76,7 +71,7 @@
*/
static inline void clear_bit(int nr, volatile unsigned long * addr)
{
- __asm__ __volatile__( LOCK_PREFIX
+ __asm__ __volatile__( LOCK
"btrl %1,%0"
:"=m" (ADDR)
:"Ir" (nr));
@@ -121,7 +116,7 @@
*/
static inline void change_bit(int nr, volatile unsigned long * addr)
{
- __asm__ __volatile__( LOCK_PREFIX
+ __asm__ __volatile__( LOCK
"btcl %1,%0"
:"=m" (ADDR)
:"Ir" (nr));
@@ -140,7 +135,7 @@
{
int oldbit;

- __asm__ __volatile__( LOCK_PREFIX
+ __asm__ __volatile__( LOCK
"btsl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"Ir" (nr) : "memory");
@@ -180,7 +175,7 @@
{
int oldbit;

- __asm__ __volatile__( LOCK_PREFIX
+ __asm__ __volatile__( LOCK
"btrl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"Ir" (nr) : "memory");
@@ -231,7 +226,7 @@
{
int oldbit;

- __asm__ __volatile__( LOCK_PREFIX
+ __asm__ __volatile__( LOCK
"btcl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"Ir" (nr) : "memory");
diff -Naur linux-2.6.12/include/asm-i386/rwsem.h linux-2.6.12.post/include/asm-i386/rwsem.h
--- linux-2.6.12/include/asm-i386/rwsem.h 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/include/asm-i386/rwsem.h 2005-07-25 05:51:21.000000000 -0400
@@ -40,6 +40,7 @@

#include <linux/list.h>
#include <linux/spinlock.h>
+#include <asm/smp_alt.h>

struct rwsem_waiter;

@@ -99,7 +100,7 @@
{
__asm__ __volatile__(
"# beginning down_read\n\t"
-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
+LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
" js 2f\n\t" /* jump if we weren't granted the lock */
"1:\n\t"
LOCK_SECTION_START("")
@@ -130,7 +131,7 @@
" movl %1,%2\n\t"
" addl %3,%2\n\t"
" jle 2f\n\t"
-LOCK_PREFIX " cmpxchgl %2,%0\n\t"
+LOCK " cmpxchgl %2,%0\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
@@ -150,7 +151,7 @@
tmp = RWSEM_ACTIVE_WRITE_BIAS;
__asm__ __volatile__(
"# beginning down_write\n\t"
-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
+LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
" testl %%edx,%%edx\n\t" /* was the count 0 before? */
" jnz 2f\n\t" /* jump if we weren't granted the lock */
"1:\n\t"
@@ -188,7 +189,7 @@
__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
__asm__ __volatile__(
"# beginning __up_read\n\t"
-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
+LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
" js 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
LOCK_SECTION_START("")
@@ -214,7 +215,7 @@
__asm__ __volatile__(
"# beginning __up_write\n\t"
" movl %2,%%edx\n\t"
-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
+LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
" jnz 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
LOCK_SECTION_START("")
@@ -239,7 +240,7 @@
{
__asm__ __volatile__(
"# beginning __downgrade_write\n\t"
-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
+LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
" js 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
LOCK_SECTION_START("")
@@ -263,7 +264,7 @@
static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
{
__asm__ __volatile__(
-LOCK_PREFIX "addl %1,%0"
+LOCK "addl %1,%0"
: "=m"(sem->count)
: "ir"(delta), "m"(sem->count));
}
@@ -276,7 +277,7 @@
int tmp = delta;

__asm__ __volatile__(
-LOCK_PREFIX "xadd %0,(%2)"
+LOCK "xadd %0,(%2)"
: "+r"(tmp), "=m"(sem->count)
: "r"(sem), "m"(sem->count)
: "memory");
diff -Naur linux-2.6.12/include/asm-i386/smp_alt.h linux-2.6.12.post/include/asm-i386/smp_alt.h
--- linux-2.6.12/include/asm-i386/smp_alt.h 1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.12.post/include/asm-i386/smp_alt.h 2005-07-25 05:51:21.000000000 -0400
@@ -0,0 +1,32 @@
+#ifndef __ASM_SMP_ALT_H__
+#define __ASM_SMP_ALT_H__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
+#define LOCK \
+ "6677: nop\n" \
+ ".section __smp_alternatives,\"a\"\n" \
+ ".long 6677b\n" \
+ ".long 6678f\n" \
+ ".previous\n" \
+ ".section __smp_replacements,\"a\"\n" \
+ "6678: .byte 1\n" \
+ ".byte 1\n" \
+ ".byte 0\n" \
+ ".byte 1\n" \
+ ".byte -1\n" \
+ "lock\n" \
+ "nop\n" \
+ ".previous\n"
+void prepare_for_smp(void);
+void unprepare_for_smp(void);
+#else
+#define LOCK "lock ; "
+#endif
+#else
+#define LOCK ""
+#endif
+
+#endif /* __ASM_SMP_ALT_H__ */
diff -Naur linux-2.6.12/include/asm-i386/spinlock.h linux-2.6.12.post/include/asm-i386/spinlock.h
--- linux-2.6.12/include/asm-i386/spinlock.h 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/include/asm-i386/spinlock.h 2005-07-25 05:51:21.000000000 -0400
@@ -6,6 +6,7 @@
#include <asm/page.h>
#include <linux/config.h>
#include <linux/compiler.h>
+#include <asm/smp_alt.h>

asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2)));
@@ -47,8 +48,9 @@
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))

#define spin_lock_string \
- "\n1:\t" \
- "lock ; decb %0\n\t" \
+ "1:\n" \
+ LOCK \
+ "decb %0\n\t" \
"jns 3f\n" \
"2:\t" \
"rep;nop\n\t" \
@@ -58,8 +60,9 @@
"3:\n\t"

#define spin_lock_string_flags \
- "\n1:\t" \
- "lock ; decb %0\n\t" \
+ "1:\n" \
+ LOCK \
+ "decb %0\n\t" \
"jns 4f\n\t" \
"2:\t" \
"testl $0x200, %1\n\t" \
@@ -121,10 +124,34 @@
static inline int _raw_spin_trylock(spinlock_t *lock)
{
char oldval;
+#ifdef CONFIG_SMP_ALTERNATIVES
__asm__ __volatile__(
- "xchgb %b0,%1"
+ "1:movb %1,%b0\n"
+ "movb $0,%1\n"
+ "2:"
+ ".section __smp_alternatives,\"a\"\n"
+ ".long 1b\n"
+ ".long 3f\n"
+ ".previous\n"
+ ".section __smp_replacements,\"a\"\n"
+ "3: .byte 2b - 1b\n"
+ ".byte 5f-4f\n"
+ ".byte 0\n"
+ ".byte 6f-5f\n"
+ ".byte -1\n"
+ "4: xchgb %b0,%1\n"
+ "5: movb %1,%b0\n"
+ "movb $0,%1\n"
+ "6:\n"
+ ".previous\n"
:"=q" (oldval), "=m" (lock->slock)
:"0" (0) : "memory");
+#else
+ __asm__ __volatile__(
+ "xchgb %b0,%1\n"
+ :"=q" (oldval), "=m" (lock->slock)
+ :"0" (0) : "memory");
+#endif
return oldval > 0;
}

@@ -225,8 +252,8 @@
__build_write_lock(rw, "__write_lock_failed");
}

-#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+#define _raw_read_unlock(rw) asm volatile(LOCK "incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw) asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")

static inline int _raw_read_trylock(rwlock_t *lock)
{
diff -Naur linux-2.6.12/include/asm-i386/system.h linux-2.6.12.post/include/asm-i386/system.h
--- linux-2.6.12/include/asm-i386/system.h 2005-06-17 15:48:29.000000000 -0400
+++ linux-2.6.12.post/include/asm-i386/system.h 2005-07-25 05:51:21.000000000 -0400
@@ -5,7 +5,7 @@
#include <linux/kernel.h>
#include <asm/segment.h>
#include <asm/cpufeature.h>
-#include <linux/bitops.h> /* for LOCK_PREFIX */
+#include <asm/smp_alt.h>

#ifdef __KERNEL__

@@ -249,19 +249,19 @@
unsigned long prev;
switch (size) {
case 1:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
: "=a"(prev)
: "q"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 2:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
: "=a"(prev)
: "q"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+ __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
: "=a"(prev)
: "q"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
@@ -425,11 +425,55 @@
#endif

#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
+#define smp_alt_mb(instr) \
+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
+ ".section __smp_alternatives,\"a\"\n" \
+ ".long 6667b\n" \
+ ".long 6673f\n" \
+ ".previous\n" \
+ ".section __smp_replacements,\"a\"\n" \
+ "6673:.byte 6668b-6667b\n" \
+ ".byte 6670f-6669f\n" \
+ ".byte 6671f-6670f\n" \
+ ".byte 0\n" \
+ ".byte %c0\n" \
+ "6669:lock;addl $0,0(%%esp)\n" \
+ "6670:" instr "\n" \
+ "6671:\n" \
+ ".previous\n" \
+ : \
+ : "i" (X86_FEATURE_XMM2) \
+ : "memory")
+#define smp_rmb() smp_alt_mb("lfence")
+#define smp_mb() smp_alt_mb("mfence")
+#define set_mb(var, value) do { \
+unsigned long __set_mb_temp; \
+__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \
+ ".section __smp_alternatives,\"a\"\n" \
+ ".long 6667b\n" \
+ ".long 6673f\n" \
+ ".previous\n" \
+ ".section __smp_replacements,\"a\"\n" \
+ "6673: .byte 6668b-6667b\n" \
+ ".byte 6670f-6669f\n" \
+ ".byte 0\n" \
+ ".byte 6671f-6670f\n" \
+ ".byte -1\n" \
+ "6669: xchg %1, %0\n" \
+ "6670:movl %1, %0\n" \
+ "6671:\n" \
+ ".previous\n" \
+ : "=m" (var), "=r" (__set_mb_temp) \
+ : "1" (value) \
+ : "memory"); } while (0)
+#else
+#define smp_rmb() rmb()
+#define smp_mb() mb()
#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#endif
+#define smp_read_barrier_depends() read_barrier_depends()
#else
#define smp_mb() barrier()
#define smp_rmb() barrier()