Re: [PATCH 13/15] x86/static_call: Add inline static call implementation for x86-64

From: Nadav Amit
Date: Fri Jun 07 2019 - 01:54:55 EST


> On Jun 5, 2019, at 6:08 AM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>
> From: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
>
> Add the inline static call implementation for x86-64. For each key, a
> temporary trampoline is created, named __static_call_tramp_<key>. The
> trampoline has an indirect jump to the destination function.
>
> Objtool uses the trampoline naming convention to detect all the call
> sites. It then annotates those call sites in the .static_call_sites
> section.
>
> During boot (and module init), the call sites are patched to call
> directly into the destination function. The temporary trampoline is
> then no longer used.
>
> Cc: x86@xxxxxxxxxx
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Julia Cartwright <julia@xxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
> Cc: Jason Baron <jbaron@xxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Jiri Kosina <jkosina@xxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
> Cc: Borislav Petkov <bp@xxxxxxxxx>
> Cc: David Laight <David.Laight@xxxxxxxxxx>
> Cc: Jessica Yu <jeyu@xxxxxxxxxx>
> Cc: Andy Lutomirski <luto@xxxxxxxxxx>
> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Link: https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.kernel.org%2Fr%2F62188c62f6dda49ca2e20629ee8e5a62a6c0b500.1543200841.git.jpoimboe%40redhat.com&amp;data=02%7C01%7Cnamit%40vmware.com%7C3a349bb2a7e042ef9d9d08d6e9b8fc2d%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636953378066316039&amp;sdata=J%2BsCYwRi8GpP5GrJaLo8nM5jN2KNZlfwq7RDuKok%2FmE%3D&amp;reserved=0
> ---
> arch/x86/Kconfig | 3
> arch/x86/include/asm/static_call.h | 28 ++++-
> arch/x86/kernel/asm-offsets.c | 6 +
> arch/x86/kernel/static_call.c | 12 +-
> include/linux/static_call.h | 2
> tools/objtool/Makefile | 3
> tools/objtool/check.c | 125 +++++++++++++++++++++++-
> tools/objtool/check.h | 2
> tools/objtool/elf.h | 1
> tools/objtool/include/linux/static_call_types.h | 19 +++
> tools/objtool/sync-check.sh | 1
> 11 files changed, 193 insertions(+), 9 deletions(-)
> create mode 100644 tools/objtool/include/linux/static_call_types.h
>
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -199,6 +199,7 @@ config X86
> select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
> select HAVE_STACK_VALIDATION if X86_64
> select HAVE_STATIC_CALL
> + select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
> select HAVE_RSEQ
> select HAVE_SYSCALL_TRACEPOINTS
> select HAVE_UNSTABLE_SCHED_CLOCK
> @@ -213,6 +214,7 @@ config X86
> select RTC_MC146818_LIB
> select SPARSE_IRQ
> select SRCU
> + select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
> select SYSCTL_EXCEPTION_TRACE
> select THREAD_INFO_IN_TASK
> select USER_STACKTRACE_SUPPORT
> @@ -439,7 +441,6 @@ config GOLDFISH
> config RETPOLINE
> bool "Avoid speculative indirect branches in kernel"
> default y
> - select STACK_VALIDATION if HAVE_STACK_VALIDATION
> help
> Compile kernel with the retpoline compiler options to guard against
> kernel-to-user data leaks by avoiding speculative indirect
> --- a/arch/x86/include/asm/static_call.h
> +++ b/arch/x86/include/asm/static_call.h
> @@ -2,6 +2,20 @@
> #ifndef _ASM_STATIC_CALL_H
> #define _ASM_STATIC_CALL_H
>
> +#include <asm/asm-offsets.h>
> +
> +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
> +
> +/*
> + * This trampoline is only used during boot / module init, so it's safe to use
> + * the indirect branch without a retpoline.
> + */
> +#define __ARCH_STATIC_CALL_TRAMP_JMP(key, func) \
> + ANNOTATE_RETPOLINE_SAFE \
> + "jmpq *" __stringify(key) "+" __stringify(SC_KEY_func) "(%rip) \n"
> +
> +#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
> +
> /*
> * Manually construct a 5-byte direct JMP to prevent the assembler from
> * optimizing it into a 2-byte JMP.
> @@ -12,9 +26,19 @@
> ".long " #func " - " __ARCH_STATIC_CALL_JMP_LABEL(key) "\n" \
> __ARCH_STATIC_CALL_JMP_LABEL(key) ":"
>
> +#endif /* !CONFIG_HAVE_STATIC_CALL_INLINE */
> +
> /*
> - * This is a permanent trampoline which does a direct jump to the function.
> - * The direct jump get patched by static_call_update().
> + * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
> + * uses the current value of the key->func pointer to do an indirect jump to
> + * the function. This trampoline is only used during boot, before the call
> + * sites get patched by static_call_update(). The name of this trampoline has
> + * a magical aspect: objtool uses it to find static call sites so it can create
> + * the .static_call_sites section.
> + *
> + * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which
> + * does a direct jump to the function. The direct jump gets patched by
> + * static_call_update().
> */
> #define ARCH_DEFINE_STATIC_CALL_TRAMP(key, func) \
> asm(".pushsection .text, \"ax\" \n" \
> --- a/arch/x86/kernel/asm-offsets.c
> +++ b/arch/x86/kernel/asm-offsets.c
> @@ -12,6 +12,7 @@
> #include <linux/hardirq.h>
> #include <linux/suspend.h>
> #include <linux/kbuild.h>
> +#include <linux/static_call.h>
> #include <asm/processor.h>
> #include <asm/thread_info.h>
> #include <asm/sigframe.h>
> @@ -104,4 +105,9 @@ static void __used common(void)
> OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
> OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
> OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
> +
> +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
> + BLANK();
> + OFFSET(SC_KEY_func, static_call_key, func);
> +#endif
> }
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -10,16 +10,22 @@
> void arch_static_call_transform(void *site, void *tramp, void *func)
> {
> unsigned char opcodes[CALL_INSN_SIZE];
> - unsigned char insn_opcode;
> + unsigned char insn_opcode, expected;
> unsigned long insn;
> s32 dest_relative;
>
> mutex_lock(&text_mutex);
>
> - insn = (unsigned long)tramp;
> + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE)) {
> + insn = (unsigned long)site;
> + expected = 0xE8; /* CALL */

RELATIVECALL_OPCODE ?

> + } else {
> + insn = (unsigned long)tramp;
> + expected = 0xE9; /* JMP */

RELATIVEJUMP_OPCODE ?

( I did not review the objtool parts )