[RFC][PATCH 4/5] atomic,x86: Implement atomic_dec_and_test_ofl()

From: Peter Zijlstra
Date: Wed Dec 08 2021 - 13:40:58 EST


Provide a better implementation of atomic_{inc,dec_and_test}_ofl() by
making use of the atomic-op condition codes.

This further improves the fast path code:

a980: b8 ff ff ff ff mov $0xffffffff,%eax
a985: f0 0f c1 07 lock xadd %eax,(%rdi)
a989: 83 e8 01 sub $0x1,%eax
a98c: 78 20 js a9ae <ring_buffer_put+0x2e>
a98e: 74 01 je a991 <ring_buffer_put+0x11>
a990: c3 ret

to:

ab81: 48 89 fb mov %rdi,%rbx
ab84: f0 ff 0f lock decl (%rdi)
ab87: 7c 04 jl ab8d <ring_buffer_put+0xd>
ab89: 74 10 je ab9b <ring_buffer_put+0x1b>
ab8b: c3 ret

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/atomic.h | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -263,6 +263,29 @@ static __always_inline int arch_atomic_f
}
#define arch_atomic_fetch_xor arch_atomic_fetch_xor

+#define arch_atomic_dec_ofl(_v, _label) \
+ asm_volatile_goto(LOCK_PREFIX "decl %[var]\n\t" \
+ "jle %l1" \
+ : : [var] "m" ((_v)->counter) \
+ : "memory" \
+ : _label)
+
+#define arch_atomic_dec_and_test_ofl(_v, _label) \
+({ \
+ __label__ __zero; \
+ __label__ __out; \
+ bool __ret = false; \
+ asm_volatile_goto(LOCK_PREFIX "decl %[var]\n\t" \
+ "jl %l2\n\t" \
+ "je %l[__zero]" \
+ : : [var] "m" ((_v)->counter) \
+ : "memory" \
+ : __zero, _label); \
+ goto __out; \
+__zero: __ret = true; \
+__out: __ret; \
+})
+
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else