Re: [PATCH 5/7] x86/percpu: Clean up percpu_add_return_op()

From: Nick Desaulniers
Date: Mon May 18 2020 - 18:46:53 EST


On Sun, May 17, 2020 at 8:29 AM Brian Gerst <brgerst@xxxxxxxxx> wrote:
>
> The core percpu macros already have a switch on the data size, so the switch
> in the x86 code is redundant and produces more dead code.
>
> Also use appropriate types for the width of the instructions. This avoids
> errors when compiling with Clang.
>
> Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
> ---
> arch/x86/include/asm/percpu.h | 51 +++++++++++------------------------
> 1 file changed, 16 insertions(+), 35 deletions(-)
>
> diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> index 21c5013a681a..ac8c391a190e 100644
> --- a/arch/x86/include/asm/percpu.h
> +++ b/arch/x86/include/asm/percpu.h
> @@ -199,34 +199,15 @@ do { \
> /*
> * Add return operation
> */
> -#define percpu_add_return_op(qual, var, val) \
> +#define percpu_add_return_op(size, qual, _var, _val) \
> ({ \
> - typeof(var) paro_ret__ = val; \
> - switch (sizeof(var)) { \
> - case 1: \
> - asm qual ("xaddb %0, "__percpu_arg(1) \
> - : "+q" (paro_ret__), "+m" (var) \
> - : : "memory"); \
> - break; \
> - case 2: \
> - asm qual ("xaddw %0, "__percpu_arg(1) \
> - : "+r" (paro_ret__), "+m" (var) \
> - : : "memory"); \
> - break; \
> - case 4: \
> - asm qual ("xaddl %0, "__percpu_arg(1) \
> - : "+r" (paro_ret__), "+m" (var) \
> - : : "memory"); \
> - break; \
> - case 8: \
> - asm qual ("xaddq %0, "__percpu_arg(1) \
> - : "+re" (paro_ret__), "+m" (var) \

^ before we use the "+re" constraint for 8B input.

> - : : "memory"); \
> - break; \
> - default: __bad_percpu_size(); \

Comment on the series as a whole. After applying the series, the
final reference to __bad_percpu_size and switch statement in
arch/x86/include/asm/percpu.h in the definition of the
percpu_stable_op() macro. If you clean that up, too, then the rest of
this file feels more consistent with your series, even if it's not a
blocker for Clang i386 support. Then you can get rid of
__bad_percpu_size, too!


> - } \
> - paro_ret__ += val; \
> - paro_ret__; \
> + __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
> + asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
> + __percpu_arg([var])) \
> + : [tmp] __pcpu_reg_##size("+", paro_tmp__), \

^ after, for `size == 8`, we use "+r". [0] says for "e":

32-bit signed integer constant, or a symbolic reference known to fit
that range (for immediate operands in sign-extending x86-64
instructions).

I'm guessing we're restricting the input to not allow for 64b signed
integer constants? Looking at the documentation for `xadd` (ie.
"exchange and add") [1], it looks like immediates are not allowed as
operands, only registers or memory addresses. So it seems that "e"
was never necessary. It might be helpful to note that in the commit
message, should you end up sending a v2 of the series. Maybe some
folks with more x86 inline asm experience can triple check/verify?

Assuming we're good to go, LGTM.
Reviewed-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx>

[0] https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
[1] https://www.felixcloutier.com/x86/xadd


> + [var] "+m" (_var) \
> + : : "memory"); \
> + (typeof(_var))(unsigned long) (paro_tmp__ + _val); \
> })
>
> /*
> @@ -377,16 +358,16 @@ do { \
> #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
> #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
>
> -#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
> -#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
> -#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
> +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
> +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
> +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
> #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
> #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
> #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
>
> -#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
> -#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
> -#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
> +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
> +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
> +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
> #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
> #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
> #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
> @@ -418,7 +399,7 @@ do { \
> #define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
> #define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
> #define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
> -#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
> +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
> #define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
> #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
>
> @@ -427,7 +408,7 @@ do { \
> #define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
> #define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
> #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
> -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
> +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
> #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
> #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
>
> --
> 2.25.4
>


--
Thanks,
~Nick Desaulniers