[PATCH v3 3/6] x86/static_call: Add out-of-line static call implementation

From: Josh Poimboeuf
Date: Wed Jan 09 2019 - 18:00:05 EST


Add the x86 out-of-line static call implementation. For each key, a
permanent trampoline is created which is the destination for all static
calls for the given key. The trampoline has a direct jump which gets
patched by static_call_update() when the destination function changes.

This relies on the fact that call destinations can be atomically updated
as long as they don't cross cache line boundaries.

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/static_call.h | 27 +++++++++++++++++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/static_call.c | 38 ++++++++++++++++++++++++++++++
arch/x86/kernel/vmlinux.lds.S | 1 +
include/asm-generic/vmlinux.lds.h | 15 ++++++++++++
6 files changed, 83 insertions(+)
create mode 100644 arch/x86/include/asm/static_call.h
create mode 100644 arch/x86/kernel/static_call.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6185d4f33296..421097322f1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -190,6 +190,7 @@ config X86
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
+ select HAVE_STATIC_CALL
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
new file mode 100644
index 000000000000..fab5facade03
--- /dev/null
+++ b/arch/x86/include/asm/static_call.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STATIC_CALL_H
+#define _ASM_STATIC_CALL_H
+
+#include <asm/asm-offsets.h>
+
+/*
+ * This trampoline is used for out-of-line static calls. It has a direct jump
+ * which gets patched by static_call_update().
+ *
+ * Trampolines are placed in the .static_call.text section to prevent two-byte
+ * tail calls to the trampoline and two-byte jumps from the trampoline.
+ *
+ * IMPORTANT: The JMP instruction's 4-byte destination must never cross
+ * cacheline boundaries! The patching code relies on that to ensure
+ * atomic updates.
+ */
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(key, func) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 8 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(key) " \n" \
+ ".type " STATIC_CALL_TRAMP_STR(key) ", @function \n" \
+ STATIC_CALL_TRAMP_STR(key) ": \n" \
+ "jmp " #func " \n" \
+ ".popsection \n")
+
+#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00b7e27bc2b7..f1329a79fd3b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -63,6 +63,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
obj-y += irqflags.o
+obj-y += static_call.o

obj-y += process.o
obj-y += fpu/
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
new file mode 100644
index 000000000000..e6ef53fbce20
--- /dev/null
+++ b/arch/x86/kernel/static_call.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/static_call.h>
+#include <linux/memory.h>
+#include <linux/bug.h>
+#include <asm/text-patching.h>
+#include <asm/nospec-branch.h>
+
+#define CALL_INSN_SIZE 5
+
+void __ref arch_static_call_transform(void *site, void *tramp, void *func)
+{
+ s32 dest_relative;
+ unsigned char opcode;
+ void *(*poker)(void *, const void *, size_t);
+ void *insn = tramp;
+
+ mutex_lock(&text_mutex);
+
+ /*
+ * For x86-64, a 32-bit cross-modifying write to a call destination is
+ * safe as long as it's within a cache line.
+ */
+ opcode = *(unsigned char *)insn;
+ if (opcode != 0xe8 && opcode != 0xe9) {
+ WARN_ONCE(1, "unexpected static call insn opcode 0x%x at %pS",
+ opcode, insn);
+ goto done;
+ }
+
+ dest_relative = (long)(func) - (long)(insn + CALL_INSN_SIZE);
+
+ poker = early_boot_irqs_disabled ? text_poke_early : text_poke;
+ poker(insn + 1, &dest_relative, sizeof(dest_relative));
+
+done:
+ mutex_unlock(&text_mutex);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0d618ee634ac..17470e88ac40 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -128,6 +128,7 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
+ STATIC_CALL_TEXT
ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
IRQENTRY_TEXT
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d7a6a9c2370..f2981a0161f2 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -320,6 +320,7 @@
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
+ STATIC_CALL_SITES \
__end_ro_after_init = .;
#endif

@@ -530,6 +531,10 @@
*(.kprobes.text) \
__kprobes_text_end = .;

+#define STATIC_CALL_TEXT \
+ ALIGN_FUNCTION(); \
+ *(.static_call.text)
+
#define ENTRY_TEXT \
ALIGN_FUNCTION(); \
__entry_text_start = .; \
@@ -725,6 +730,16 @@
#define BUG_TABLE
#endif

+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+#define STATIC_CALL_SITES \
+ . = ALIGN(8); \
+ __start_static_call_sites = .; \
+ KEEP(*(.static_call_sites)) \
+ __stop_static_call_sites = .;
+#else
+#define STATIC_CALL_SITES
+#endif
+
#ifdef CONFIG_UNWINDER_ORC
#define ORC_UNWIND_TABLE \
. = ALIGN(4); \
--
2.17.2