alpha cmpxchg.h (was Re: [PATCH v2 cmpxchg 12/13] sh: Emulate one-byte cmpxchg)

From: Al Viro
Date: Thu May 02 2024 - 17:01:41 EST


On Thu, May 02, 2024 at 09:53:45PM +0100, Al Viro wrote:
> What's more, load-locked/store-conditional doesn't have 16bit and 8bit
> variants on any Alphas - it's always 32bit (ldl_l) or 64bit (ldq_l).
>
> What BWX adds is load/store byte/word, load/store byte/word unaligned
> and sign-extend byte/word. IOW, it's absolutely irrelevant for
> cmpxchg (or xchg) purposes.

FWIW, I do have a cmpxchg-related patch for alpha - the mess with xchg.h
(parametrized double include) is no longer needed, and hadn't been since
2018 (fbfcd0199170 "locking/xchg/alpha: Remove superfluous memory barriers
from the _local() variants" was the point when the things settled down).
Only tangentially related to your stuff, but it makes the damn thing
easier to follow.


commit e992b5436ccd504b07a390118cf2be686355b957
Author: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Date: Mon Apr 8 17:43:37 2024 -0400

alpha: no need to include asm/xchg.h twice

We used to generate different helpers for local and full
{cmp,}xchg(); these days the barriers are in arch_{cmp,}xchg()
instead and generated helpers are identical for local and
full cases. No need for those parametrized includes of
asm/xchg.h - we might as well insert its contents directly
in asm/cmpxchg.h and do it only once.

Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>

diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 91d4a4d9258c..ae1b96479d0c 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -3,17 +3,232 @@
#define _ALPHA_CMPXCHG_H

/*
- * Atomic exchange routines.
+ * Atomic exchange.
+ * Since it can be used to implement critical sections
+ * it must clobber "memory" (also for interrupts in UP).
*/

-#define ____xchg(type, args...) __arch_xchg ## type ## _local(args)
-#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args)
-#include <asm/xchg.h>
+static inline unsigned long
+____xchg_u8(volatile char *m, unsigned long val)
+{
+ unsigned long ret, tmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %4,7,%3\n"
+ " insbl %1,%4,%1\n"
+ "1: ldq_l %2,0(%3)\n"
+ " extbl %2,%4,%0\n"
+ " mskbl %2,%4,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%3)\n"
+ " beq %2,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+ : "r" ((long)m), "1" (val) : "memory");
+
+ return ret;
+}
+
+static inline unsigned long
+____xchg_u16(volatile short *m, unsigned long val)
+{
+ unsigned long ret, tmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %4,7,%3\n"
+ " inswl %1,%4,%1\n"
+ "1: ldq_l %2,0(%3)\n"
+ " extwl %2,%4,%0\n"
+ " mskwl %2,%4,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%3)\n"
+ " beq %2,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+ : "r" ((long)m), "1" (val) : "memory");
+
+ return ret;
+}
+
+static inline unsigned long
+____xchg_u32(volatile int *m, unsigned long val)
+{
+ unsigned long dummy;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%4\n"
+ " bis $31,%3,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (val), "=&r" (dummy), "=m" (*m)
+ : "rI" (val), "m" (*m) : "memory");
+
+ return val;
+}
+
+static inline unsigned long
+____xchg_u64(volatile long *m, unsigned long val)
+{
+ unsigned long dummy;
+
+ __asm__ __volatile__(
+ "1: ldq_l %0,%4\n"
+ " bis $31,%3,%1\n"
+ " stq_c %1,%2\n"
+ " beq %1,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (val), "=&r" (dummy), "=m" (*m)
+ : "rI" (val), "m" (*m) : "memory");
+
+ return val;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+ if something tries to do an invalid xchg(). */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____xchg(volatile void *ptr, unsigned long x, int size)
+{
+ return
+ size == 1 ? ____xchg_u8(ptr, x) :
+ size == 2 ? ____xchg_u16(ptr, x) :
+ size == 4 ? ____xchg_u32(ptr, x) :
+ size == 8 ? ____xchg_u64(ptr, x) :
+ (__xchg_called_with_bad_pointer(), x);
+}
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+static inline unsigned long
+____cmpxchg_u8(volatile char *m, unsigned char old, unsigned char new)
+{
+ unsigned long prev, tmp, cmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %5,7,%4\n"
+ " insbl %1,%5,%1\n"
+ "1: ldq_l %2,0(%4)\n"
+ " extbl %2,%5,%0\n"
+ " cmpeq %0,%6,%3\n"
+ " beq %3,2f\n"
+ " mskbl %2,%5,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%4)\n"
+ " beq %2,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+ : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+____cmpxchg_u16(volatile short *m, unsigned short old, unsigned short new)
+{
+ unsigned long prev, tmp, cmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %5,7,%4\n"
+ " inswl %1,%5,%1\n"
+ "1: ldq_l %2,0(%4)\n"
+ " extwl %2,%5,%0\n"
+ " cmpeq %0,%6,%3\n"
+ " beq %3,2f\n"
+ " mskwl %2,%5,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%4)\n"
+ " beq %2,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+ : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+____cmpxchg_u32(volatile int *m, int old, int new)
+{
+ unsigned long prev, cmp;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%5\n"
+ " cmpeq %0,%3,%1\n"
+ " beq %1,2f\n"
+ " mov %4,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r"(prev), "=&r"(cmp), "=m"(*m)
+ : "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+____cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new)
+{
+ unsigned long prev, cmp;
+
+ __asm__ __volatile__(
+ "1: ldq_l %0,%5\n"
+ " cmpeq %0,%3,%1\n"
+ " beq %1,2f\n"
+ " mov %4,%1\n"
+ " stq_c %1,%2\n"
+ " beq %1,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r"(prev), "=&r"(cmp), "=m"(*m)
+ : "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+ return prev;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+ if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+ int size)
+{
+ return
+ size == 1 ? ____cmpxchg_u8(ptr, old, new) :
+ size == 2 ? ____cmpxchg_u16(ptr, old, new) :
+ size == 4 ? ____cmpxchg_u32(ptr, old, new) :
+ size == 8 ? ____cmpxchg_u64(ptr, old, new) :
+ (__cmpxchg_called_with_bad_pointer(), old);
+}

#define xchg_local(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __arch_xchg_local((ptr), (unsigned long)_x_,\
+ (__typeof__(*(ptr))) ____xchg((ptr), (unsigned long)_x_, \
sizeof(*(ptr))); \
})

@@ -21,7 +236,7 @@
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
+ (__typeof__(*(ptr))) ____cmpxchg((ptr), (unsigned long)_o_, \
(unsigned long)_n_, \
sizeof(*(ptr))); \
})
@@ -32,12 +247,6 @@
cmpxchg_local((ptr), (o), (n)); \
})

-#undef ____xchg
-#undef ____cmpxchg
-#define ____xchg(type, args...) __arch_xchg ##type(args)
-#define ____cmpxchg(type, args...) __cmpxchg ##type(args)
-#include <asm/xchg.h>
-
/*
* The leading and the trailing memory barriers guarantee that these
* operations are fully ordered.
@@ -48,7 +257,7 @@
__typeof__(*(ptr)) _x_ = (x); \
smp_mb(); \
__ret = (__typeof__(*(ptr))) \
- __arch_xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
+ ____xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
smp_mb(); \
__ret; \
})
@@ -59,7 +268,7 @@
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
smp_mb(); \
- __ret = (__typeof__(*(ptr))) __cmpxchg((ptr), \
+ __ret = (__typeof__(*(ptr))) ____cmpxchg((ptr), \
(unsigned long)_o_, (unsigned long)_n_, sizeof(*(ptr)));\
smp_mb(); \
__ret; \
@@ -71,6 +280,4 @@
arch_cmpxchg((ptr), (o), (n)); \
})

-#undef ____cmpxchg
-
#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
deleted file mode 100644
index 7adb80c6746a..000000000000
--- a/arch/alpha/include/asm/xchg.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ALPHA_CMPXCHG_H
-#error Do not include xchg.h directly!
-#else
-/*
- * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code
- * except that local version do not have the expensive memory barrier.
- * So this file is included twice from asm/cmpxchg.h.
- */
-
-/*
- * Atomic exchange.
- * Since it can be used to implement critical sections
- * it must clobber "memory" (also for interrupts in UP).
- */
-
-static inline unsigned long
-____xchg(_u8, volatile char *m, unsigned long val)
-{
- unsigned long ret, tmp, addr64;
-
- __asm__ __volatile__(
- " andnot %4,7,%3\n"
- " insbl %1,%4,%1\n"
- "1: ldq_l %2,0(%3)\n"
- " extbl %2,%4,%0\n"
- " mskbl %2,%4,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%3)\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
- : "r" ((long)m), "1" (val) : "memory");
-
- return ret;
-}
-
-static inline unsigned long
-____xchg(_u16, volatile short *m, unsigned long val)
-{
- unsigned long ret, tmp, addr64;
-
- __asm__ __volatile__(
- " andnot %4,7,%3\n"
- " inswl %1,%4,%1\n"
- "1: ldq_l %2,0(%3)\n"
- " extwl %2,%4,%0\n"
- " mskwl %2,%4,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%3)\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
- : "r" ((long)m), "1" (val) : "memory");
-
- return ret;
-}
-
-static inline unsigned long
-____xchg(_u32, volatile int *m, unsigned long val)
-{
- unsigned long dummy;
-
- __asm__ __volatile__(
- "1: ldl_l %0,%4\n"
- " bis $31,%3,%1\n"
- " stl_c %1,%2\n"
- " beq %1,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (val), "=&r" (dummy), "=m" (*m)
- : "rI" (val), "m" (*m) : "memory");
-
- return val;
-}
-
-static inline unsigned long
-____xchg(_u64, volatile long *m, unsigned long val)
-{
- unsigned long dummy;
-
- __asm__ __volatile__(
- "1: ldq_l %0,%4\n"
- " bis $31,%3,%1\n"
- " stq_c %1,%2\n"
- " beq %1,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (val), "=&r" (dummy), "=m" (*m)
- : "rI" (val), "m" (*m) : "memory");
-
- return val;
-}
-
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid xchg(). */
-extern void __xchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-____xchg(, volatile void *ptr, unsigned long x, int size)
-{
- switch (size) {
- case 1:
- return ____xchg(_u8, ptr, x);
- case 2:
- return ____xchg(_u16, ptr, x);
- case 4:
- return ____xchg(_u32, ptr, x);
- case 8:
- return ____xchg(_u64, ptr, x);
- }
- __xchg_called_with_bad_pointer();
- return x;
-}
-
-/*
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
- * store NEW in MEM. Return the initial value in MEM. Success is
- * indicated by comparing RETURN with OLD.
- */
-
-static inline unsigned long
-____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
-{
- unsigned long prev, tmp, cmp, addr64;
-
- __asm__ __volatile__(
- " andnot %5,7,%4\n"
- " insbl %1,%5,%1\n"
- "1: ldq_l %2,0(%4)\n"
- " extbl %2,%5,%0\n"
- " cmpeq %0,%6,%3\n"
- " beq %3,2f\n"
- " mskbl %2,%5,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%4)\n"
- " beq %2,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
- : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
-{
- unsigned long prev, tmp, cmp, addr64;
-
- __asm__ __volatile__(
- " andnot %5,7,%4\n"
- " inswl %1,%5,%1\n"
- "1: ldq_l %2,0(%4)\n"
- " extwl %2,%5,%0\n"
- " cmpeq %0,%6,%3\n"
- " beq %3,2f\n"
- " mskwl %2,%5,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%4)\n"
- " beq %2,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
- : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-____cmpxchg(_u32, volatile int *m, int old, int new)
-{
- unsigned long prev, cmp;
-
- __asm__ __volatile__(
- "1: ldl_l %0,%5\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,2f\n"
- " mov %4,%1\n"
- " stl_c %1,%2\n"
- " beq %1,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r"(prev), "=&r"(cmp), "=m"(*m)
- : "r"((long) old), "r"(new), "m"(*m) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
-{
- unsigned long prev, cmp;
-
- __asm__ __volatile__(
- "1: ldq_l %0,%5\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,2f\n"
- " mov %4,%1\n"
- " stq_c %1,%2\n"
- " beq %1,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r"(prev), "=&r"(cmp), "=m"(*m)
- : "r"((long) old), "r"(new), "m"(*m) : "memory");
-
- return prev;
-}
-
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid cmpxchg(). */
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new,
- int size)
-{
- switch (size) {
- case 1:
- return ____cmpxchg(_u8, ptr, old, new);
- case 2:
- return ____cmpxchg(_u16, ptr, old, new);
- case 4:
- return ____cmpxchg(_u32, ptr, old, new);
- case 8:
- return ____cmpxchg(_u64, ptr, old, new);
- }
- __cmpxchg_called_with_bad_pointer();
- return old;
-}
-
-#endif