Re: locking/atomic: Introduce atomic_try_cmpxchg()

From: Peter Zijlstra
Date: Sat Mar 25 2017 - 03:52:56 EST


On Fri, Mar 24, 2017 at 10:23:29PM +0100, Peter Zijlstra wrote:

> I'll try and redo the patches that landed in tip and see what it does
> for total vmlinux size somewhere tomorrow.

text data bss dec hex filename
10726413 4540256 843776 16110445 f5d36d defconfig-build/vmlinux.pre
10730509 4540256 843776 16114541 f5e36d defconfig-build/vmlinux.post

:-(

---
arch/x86/include/asm/atomic.h | 18 ++++++-------
arch/x86/include/asm/atomic64_64.h | 24 ++++++++++--------
arch/x86/include/asm/cmpxchg.h | 50 ++++++++++++++++++++----------------
include/linux/atomic.h | 52 +++++++++++++++++++++++++-------------
lib/refcount.c | 35 ++++++++++++++-----------
5 files changed, 104 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index caa5798..c80d4914 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -186,11 +186,8 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return cmpxchg(&v->counter, old, new);
}

-#define atomic_try_cmpxchg atomic_try_cmpxchg
-static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
- return try_cmpxchg(&v->counter, old, new);
-}
+#define atomic_try_cmpxchg(_ptr, _old, _new, _label) \
+ try_cmpxchg(&(_ptr)->counter, _old, _new, _label)

static inline int atomic_xchg(atomic_t *v, int new)
{
@@ -210,8 +207,9 @@ static inline void atomic_##op(int i, atomic_t *v) \
static inline int atomic_fetch_##op(int i, atomic_t *v) \
{ \
int val = atomic_read(v); \
- do { \
- } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \
+ for (;;) \
+ val = atomic_try_cmpxchg(v, val, val c_op i, success); \
+success: \
return val; \
}

@@ -239,10 +237,12 @@ ATOMIC_OPS(xor, ^)
static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int c = atomic_read(v);
- do {
+ for (;;) {
if (unlikely(c == u))
break;
- } while (!atomic_try_cmpxchg(v, &c, c + a));
+ c = atomic_try_cmpxchg(v, c, c + a, success);
+ }
+success:
return c;
}

diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6189a43..489c3e2 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -176,11 +176,8 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
return cmpxchg(&v->counter, old, new);
}

-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
-{
- return try_cmpxchg(&v->counter, old, new);
-}
+#define atomic64_try_cmpxchg(_ptr, _old, _new, _label) \
+ try_cmpxchg(&(_ptr)->counter, _old, _new, _label)

static inline long atomic64_xchg(atomic64_t *v, long new)
{
@@ -199,10 +196,12 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
{
long c = atomic64_read(v);
- do {
+ for (;;) {
if (unlikely(c == u))
return false;
- } while (!atomic64_try_cmpxchg(v, &c, c + a));
+ c = atomic64_try_cmpxchg(v, c, c + a, success);
+ }
+success:
return true;
}

@@ -218,11 +217,13 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
long dec, c = atomic64_read(v);
- do {
+ for (;;) {
dec = c - 1;
if (unlikely(dec < 0))
break;
- } while (!atomic64_try_cmpxchg(v, &c, dec));
+ c = atomic64_try_cmpxchg(v, c, dec, success);
+ }
+success:
return dec;
}

@@ -239,8 +240,9 @@ static inline void atomic64_##op(long i, atomic64_t *v) \
static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
{ \
long val = atomic64_read(v); \
- do { \
- } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \
+ for (;;) \
+ val = atomic64_try_cmpxchg(v, val, val c_op i, success);\
+success: \
return val; \
}

diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index fb961db..e6b8a8f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -154,22 +154,24 @@ extern void __add_wrong_size(void)
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))


-#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
+#define __raw_try_cmpxchg(_ptr, _old, _new, success_label, size, lock) \
({ \
- bool success; \
- __typeof__(_ptr) _old = (_pold); \
- __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __old = (_old); \
__typeof__(*(_ptr)) __new = (_new); \
+ __typeof__(*(_ptr)) __ret; \
+ bool __success; \
+ \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
- : CC_OUT(z) (success), \
+ : CC_OUT(z) (__success), \
[ptr] "+m" (*__ptr), \
- [old] "+a" (__old) \
- : [new] "q" (__new) \
+ [old] "=a" (__ret) \
+ : [new] "q" (__new), \
+ "2" (__old) \
: "memory"); \
break; \
} \
@@ -178,10 +180,11 @@ extern void __add_wrong_size(void)
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
- : CC_OUT(z) (success), \
+ : CC_OUT(z) (__success), \
[ptr] "+m" (*__ptr), \
- [old] "+a" (__old) \
- : [new] "r" (__new) \
+ [old] "=a" (__ret) \
+ : [new] "r" (__new), \
+ "2" (__old) \
: "memory"); \
break; \
} \
@@ -190,10 +193,11 @@ extern void __add_wrong_size(void)
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
- : CC_OUT(z) (success), \
+ : CC_OUT(z) (__success), \
[ptr] "+m" (*__ptr), \
- [old] "+a" (__old) \
- : [new] "r" (__new) \
+ [old] "=a" (__ret) \
+ : [new] "r" (__new), \
+ "2" (__old) \
: "memory"); \
break; \
} \
@@ -202,25 +206,27 @@ extern void __add_wrong_size(void)
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
- : CC_OUT(z) (success), \
+ : CC_OUT(z) (__success), \
[ptr] "+m" (*__ptr), \
- [old] "+a" (__old) \
- : [new] "r" (__new) \
+ [old] "=a" (__ret) \
+ : [new] "r" (__new), \
+ "2" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
- *_old = __old; \
- success; \
+ \
+ if (likely(__success)) goto success_label; \
+ __ret; \
})

-#define __try_cmpxchg(ptr, pold, new, size) \
- __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+#define __try_cmpxchg(ptr, pold, new, success_label, size) \
+ __raw_try_cmpxchg((ptr), (pold), (new), success_label, (size), LOCK_PREFIX)

-#define try_cmpxchg(ptr, pold, new) \
- __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+#define try_cmpxchg(ptr, pold, new, success_label) \
+ __try_cmpxchg((ptr), (pold), (new), success_label, sizeof(*(ptr)))

/*
* xadd() adds "inc" to "*ptr" and atomically returns the previous
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index aae5953..13a6eac 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -425,18 +425,26 @@

#ifndef atomic_try_cmpxchg

-#define __atomic_try_cmpxchg(type, _p, _po, _n) \
+#define __atomic_try_cmpxchg(type, _p, _o, _n, _label) \
({ \
- typeof(_po) __po = (_po); \
- typeof(*(_po)) __o = *__po; \
- *__po = atomic_cmpxchg##type((_p), __o, (_n)); \
- (*__po == __o); \
+ typeof(*(_p)) __r; \
+ typeof(*(_p)) __o = (_o); \
+ __r = atomic_cmpxchg##type((_p), __o, (_n)); \
+ if (__r == __o) goto _label; \
+ __r; \
})

-#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n)
-#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n)
+#define atomic_try_cmpxchg(_p, _o, _n, _l) \
+ __atomic_try_cmpxchg(, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_relaxed(_p, _o, _n, _l) \
+ __atomic_try_cmpxchg(_relaxed, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_acquire(_p, _o, _n, _l) \
+ __atomic_try_cmpxchg(_acquire, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_release(_p, _o, _n, _l) \
+ __atomic_try_cmpxchg(_release, _p, _o, _n, _l)

#else /* atomic_try_cmpxchg */
#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg
@@ -1019,18 +1027,26 @@ static inline int atomic_dec_if_positive(atomic_t *v)

#ifndef atomic64_try_cmpxchg

-#define __atomic64_try_cmpxchg(type, _p, _po, _n) \
+#define __atomic64_try_cmpxchg(type, _p, _o, _n, _label) \
({ \
- typeof(_po) __po = (_po); \
- typeof(*(_po)) __o = *__po; \
- *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \
- (*__po == __o); \
+ typeof(*(_p)) __r; \
+ typeof(*(_p)) __o = (_o); \
+ __r = atomic64_cmpxchg##type((_p), __o, (_n)); \
+ if (__r == __o) goto _label; \
+ __r; \
})

-#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n)
-#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n)
+#define atomic64_try_cmpxchg(_p, _o, _n, _l) \
+ __atomic64_try_cmpxchg(, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_relaxed(_p, _o, _n, _l) \
+ __atomic64_try_cmpxchg(_relaxed, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_acquire(_p, _o, _n, _l) \
+ __atomic64_try_cmpxchg(_acquire, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_release(_p, _o, _n, _l) \
+ __atomic64_try_cmpxchg(_release, _p, _o, _n, _l)

#else /* atomic64_try_cmpxchg */
#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg
diff --git a/lib/refcount.c b/lib/refcount.c
index f42124c..18b8926 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -59,7 +59,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
{
unsigned int new, val = atomic_read(&r->refs);

- do {
+ for (;;) {
if (!val)
return false;

@@ -70,8 +70,9 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
if (new < val)
new = UINT_MAX;

- } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
-
+ val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success);
+ }
+success:
WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");

return true;
@@ -116,7 +117,7 @@ bool refcount_inc_not_zero(refcount_t *r)
{
unsigned int new, val = atomic_read(&r->refs);

- do {
+ for (;;) {
new = val + 1;

if (!val)
@@ -125,8 +126,9 @@ bool refcount_inc_not_zero(refcount_t *r)
if (unlikely(!new))
return true;

- } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
-
+ val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success);
+ }
+success:
WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");

return true;
@@ -175,7 +177,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
unsigned int new, val = atomic_read(&r->refs);

- do {
+ for (;;) {
if (unlikely(val == UINT_MAX))
return false;

@@ -185,8 +187,9 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
return false;
}

- } while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
-
+ val = atomic_try_cmpxchg_release(&r->refs, val, new, success);
+ }
+success:
return !new;
}
EXPORT_SYMBOL_GPL(refcount_sub_and_test);
@@ -244,9 +247,10 @@ EXPORT_SYMBOL_GPL(refcount_dec);
*/
bool refcount_dec_if_one(refcount_t *r)
{
- int val = 1;
-
- return atomic_try_cmpxchg_release(&r->refs, &val, 0);
+ atomic_try_cmpxchg_release(&r->refs, 1, 0, success);
+ return false;
+success:
+ return true;
}
EXPORT_SYMBOL_GPL(refcount_dec_if_one);

@@ -265,7 +269,7 @@ bool refcount_dec_not_one(refcount_t *r)
{
unsigned int new, val = atomic_read(&r->refs);

- do {
+ for (;;) {
if (unlikely(val == UINT_MAX))
return true;

@@ -278,8 +282,9 @@ bool refcount_dec_not_one(refcount_t *r)
return true;
}

- } while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
-
+ val = atomic_try_cmpxchg_release(&r->refs, val, new, success);
+ }
+success:
return true;
}
EXPORT_SYMBOL_GPL(refcount_dec_not_one);