[tip: locking/urgent] static_call: Properly initialise DEFINE_STATIC_CALL_RET0()

From: tip-bot2 for Christophe Leroy
Date: Tue Apr 05 2022 - 06:26:34 EST


The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 5517d500829c683a358a8de04ecb2e28af629ae5
Gitweb: https://git.kernel.org/tip/5517d500829c683a358a8de04ecb2e28af629ae5
Author: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
AuthorDate: Mon, 14 Mar 2022 11:27:35 +01:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 05 Apr 2022 09:59:38 +02:00

static_call: Properly initialise DEFINE_STATIC_CALL_RET0()

When a static call is updated with __static_call_return0() as target,
arch_static_call_transform() set it to use an optimised set of
instructions which are meant to lay in the same cacheline.

But when initialising a static call with DEFINE_STATIC_CALL_RET0(),
we get a branch to the real __static_call_return0() function instead
of getting the optimised setup:

c00d8120 <__SCT__perf_snapshot_branch_stack>:
c00d8120: 4b ff ff f4 b c00d8114 <__static_call_return0>
c00d8124: 3d 80 c0 0e lis r12,-16370
c00d8128: 81 8c 81 3c lwz r12,-32452(r12)
c00d812c: 7d 89 03 a6 mtctr r12
c00d8130: 4e 80 04 20 bctr
c00d8134: 38 60 00 00 li r3,0
c00d8138: 4e 80 00 20 blr
c00d813c: 00 00 00 00 .long 0x0

Add ARCH_DEFINE_STATIC_CALL_RET0_TRAMP() defined by each architecture
to setup the optimised configuration, and rework
DEFINE_STATIC_CALL_RET0() to call it:

c00d8120 <__SCT__perf_snapshot_branch_stack>:
c00d8120: 48 00 00 14 b c00d8134 <__SCT__perf_snapshot_branch_stack+0x14>
c00d8124: 3d 80 c0 0e lis r12,-16370
c00d8128: 81 8c 81 3c lwz r12,-32452(r12)
c00d812c: 7d 89 03 a6 mtctr r12
c00d8130: 4e 80 04 20 bctr
c00d8134: 38 60 00 00 li r3,0
c00d8138: 4e 80 00 20 blr
c00d813c: 00 00 00 00 .long 0x0

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Acked-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Link: https://lore.kernel.org/r/1e0a61a88f52a460f62a58ffc2a5f847d1f7d9d8.1647253456.git.christophe.leroy@xxxxxxxxxx
---
arch/powerpc/include/asm/static_call.h | 1 +
arch/x86/include/asm/static_call.h | 2 ++
include/linux/static_call.h | 20 +++++++++++++++++---
3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
index 0a0bc79..de1018c 100644
--- a/arch/powerpc/include/asm/static_call.h
+++ b/arch/powerpc/include/asm/static_call.h
@@ -24,5 +24,6 @@

#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func)
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr")
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20")

#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index ed4f8bb..2455d72 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -38,6 +38,8 @@
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")

+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)

#define ARCH_ADD_TRAMP_KEY(name) \
asm(".pushsection .static_call_tramp_key, \"a\" \n" \
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index fcc5b48..3c50b0f 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -196,6 +196,14 @@ extern long __static_call_return0(void);
}; \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

+#define DEFINE_STATIC_CALL_RET0(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = __static_call_return0, \
+ .type = 1, \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
+
#define static_call_cond(name) (void)__static_call(name)

#define EXPORT_STATIC_CALL(name) \
@@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; }
}; \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

+#define DEFINE_STATIC_CALL_RET0(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = __static_call_return0, \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)

#define static_call_cond(name) (void)__static_call(name)

@@ -284,6 +298,9 @@ static inline long __static_call_return0(void)
.func = NULL, \
}

+#define DEFINE_STATIC_CALL_RET0(name, _func) \
+ __DEFINE_STATIC_CALL(name, _func, __static_call_return0)
+
static inline void __static_call_nop(void) { }

/*
@@ -327,7 +344,4 @@ static inline int static_call_text_reserved(void *start, void *end)
#define DEFINE_STATIC_CALL(name, _func) \
__DEFINE_STATIC_CALL(name, _func, _func)

-#define DEFINE_STATIC_CALL_RET0(name, _func) \
- __DEFINE_STATIC_CALL(name, _func, __static_call_return0)
-
#endif /* _LINUX_STATIC_CALL_H */