[RFC 03/16] kgr: initial code

From: Jiri Slaby
Date: Wed Apr 30 2014 - 10:32:13 EST


From: Jiri Kosina <jkosina@xxxxxxx>

Provide initial implementation. We are now able to do ftrace-based
runtime patching of the kernel code.

In addition to that, we will provide a kgr_patcher module in the next
patch to test the functionality.

Limitations/TODOs:

- rmmod of the module that provides the patch is not possible (it'd be nice
if that'd cause reverse application of the patch -- would be necessary to
keep a list of patched locations)
- x86_64 only

Additional squashes to this patch:
jk: add missing Kconfig.kgr
jk: fixup a header bug
jk: cleanup comments
js: port to new mcount infrastructure
js: order includes
js: fix for non-KGR (prototype and Kconfig fixes)
js: fix potential lock imbalance in kgr_patch_code
js: use insn helper for jmp generation
js: add \n to a printk
jk: externally_visible attribute warning fix
jk: symbol lookup failure handling
jk: fix race between patching and setting a flag (thanks to bpetkov)
js: add more sanity checking
js: handle missing kallsyms gracefully
js: use correct name, not alias
js: fix index in cleanup path
js: clear kgr_in_progress for all syscall paths
js: cleanup
js: do the checking in the process context
js: call kgr_mark_processes outside loop and locks
jk: convert from raw patching to ftrace API
jk: depend on regs-saving ftrace
js: make kgr_init an init_call
js: use correct offset for stub

Signed-off-by: Jiri Kosina <jkosina@xxxxxxx>
Signed-off-by: Jiri Slaby <jslaby@xxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/Kconfig | 2 +
arch/x86/include/asm/kgr.h | 39 +++++
arch/x86/include/asm/thread_info.h | 1 +
arch/x86/kernel/asm-offsets.c | 1 +
arch/x86/kernel/entry_64.S | 3 +
arch/x86/kernel/x8664_ksyms_64.c | 1 +
include/linux/kgr.h | 71 +++++++++
kernel/Kconfig.kgr | 7 +
kernel/Makefile | 1 +
kernel/kgr.c | 308 +++++++++++++++++++++++++++++++++++++
10 files changed, 434 insertions(+)
create mode 100644 arch/x86/include/asm/kgr.h
create mode 100644 include/linux/kgr.h
create mode 100644 kernel/Kconfig.kgr
create mode 100644 kernel/kgr.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 25d2c6f7325e..789a4c870ab3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -130,6 +130,7 @@ config X86
select HAVE_CC_STACKPROTECTOR
select GENERIC_CPU_AUTOPROBE
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_KGR

config INSTRUCTION_DECODER
def_bool y
@@ -263,6 +264,7 @@ config ARCH_SUPPORTS_UPROBES

source "init/Kconfig"
source "kernel/Kconfig.freezer"
+source "kernel/Kconfig.kgr"

menu "Processor type and features"

diff --git a/arch/x86/include/asm/kgr.h b/arch/x86/include/asm/kgr.h
new file mode 100644
index 000000000000..172f7b966bb5
--- /dev/null
+++ b/arch/x86/include/asm/kgr.h
@@ -0,0 +1,39 @@
+#ifndef ASM_KGR_H
+#define ASM_KGR_H
+
+#include <linux/linkage.h>
+
+/*
+ * The stub needs to modify the RIP value stored in struct pt_regs
+ * so that ftrace redirects the execution properly.
+ */
+#define KGR_STUB_ARCH_SLOW(_name, _new_function) \
+static void _new_function ##_stub_slow (unsigned long ip, unsigned long parent_ip, \
+ struct ftrace_ops *ops, struct pt_regs *regs) \
+{ \
+ struct kgr_loc_caches *c = ops->private; \
+ \
+ if (task_thread_info(current)->kgr_in_progress && current->mm) {\
+ pr_info("kgr: slow stub: calling old code at %lx\n", \
+ c->old); \
+ regs->ip = c->old + MCOUNT_INSN_SIZE; \
+ } else { \
+ pr_info("kgr: slow stub: calling new code at %lx\n", \
+ c->new); \
+ regs->ip = c->new; \
+ } \
+}
+
+#define KGR_STUB_ARCH_FAST(_name, _new_function) \
+static void _new_function ##_stub_fast (unsigned long ip, \
+ unsigned long parent_ip, struct ftrace_ops *ops, \
+ struct pt_regs *regs) \
+{ \
+ struct kgr_loc_caches *c = ops->private; \
+ \
+ BUG_ON(!c->new); \
+ pr_info("kgr: fast stub: calling new code at %lx\n", c->new); \
+ regs->ip = c->new; \
+}
+
+#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 47e5de25ba79..1fdc144dcc9c 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -35,6 +35,7 @@ struct thread_info {
void __user *sysenter_return;
unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */
+ unsigned short kgr_in_progress;
};

#define INIT_THREAD_INFO(tsk) \
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 9f6b9341950f..0db0437967a2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,6 +32,7 @@ void common(void) {
OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status);
OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_kgr_in_progress, thread_info, kgr_in_progress);

BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c3628bf2..a03b1e9d2de3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -615,6 +615,7 @@ GLOBAL(system_call_after_swapgs)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
+ movw $0, TI_kgr_in_progress+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz tracesys
system_call_fastpath:
@@ -639,6 +640,7 @@ sysret_check:
LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
+ movw $0, TI_kgr_in_progress+THREAD_INFO(%rsp,RIP-ARGOFFSET)
movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
andl %edi,%edx
jnz sysret_careful
@@ -761,6 +763,7 @@ GLOBAL(int_ret_from_sys_call)
GLOBAL(int_with_check)
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
+ movw $0, TI_kgr_in_progress(%rcx)
movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz int_careful
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 040681928e9d..df6425d44fa0 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -3,6 +3,7 @@

#include <linux/module.h>
#include <linux/smp.h>
+#include <linux/kgr.h>

#include <net/checksum.h>

diff --git a/include/linux/kgr.h b/include/linux/kgr.h
new file mode 100644
index 000000000000..d72add7f3d5d
--- /dev/null
+++ b/include/linux/kgr.h
@@ -0,0 +1,71 @@
+#ifndef LINUX_KGR_H
+#define LINUX_KGR_H
+
+#include <linux/init.h>
+#include <linux/ftrace.h>
+
+#include <asm/kgr.h>
+
+#ifdef CONFIG_KGR
+
+#define KGR_TIMEOUT 30
+#define KGR_DEBUG 1
+
+#ifdef KGR_DEBUG
+#define kgr_debug(args...) \
+ pr_info(args);
+#else
+#define kgr_debug(args...) { }
+#endif
+
+struct kgr_patch {
+ char reserved;
+ const struct kgr_patch_fun {
+ const char *name;
+ const char *new_name;
+ void *new_function;
+ struct ftrace_ops *ftrace_ops_slow;
+ struct ftrace_ops *ftrace_ops_fast;
+
+ } *patches[];
+};
+
+/*
+ * data structure holding locations of the source and target function
+ * fentry sites to avoid repeated lookups
+ */
+struct kgr_loc_caches {
+ unsigned long old;
+ unsigned long new;
+};
+
+#define KGR_PATCHED_FUNCTION(patch, _name, _new_function) \
+ KGR_STUB_ARCH_SLOW(_name, _new_function); \
+ KGR_STUB_ARCH_FAST(_name, _new_function); \
+ extern void _new_function ## _stub_slow (unsigned long, unsigned long, \
+ struct ftrace_ops *, struct pt_regs *); \
+ extern void _new_function ## _stub_fast (unsigned long, unsigned long, \
+ struct ftrace_ops *, struct pt_regs *); \
+ static struct ftrace_ops __kgr_patch_ftrace_ops_slow_ ## _name = { \
+ .func = _new_function ## _stub_slow, \
+ .flags = FTRACE_OPS_FL_SAVE_REGS, \
+ }; \
+ static struct ftrace_ops __kgr_patch_ftrace_ops_fast_ ## _name = { \
+ .func = _new_function ## _stub_fast, \
+ .flags = FTRACE_OPS_FL_SAVE_REGS, \
+ }; \
+ static const struct kgr_patch_fun __kgr_patch_ ## _name = { \
+ .name = #_name, \
+ .new_name = #_new_function, \
+ .new_function = _new_function, \
+ .ftrace_ops_slow = &__kgr_patch_ftrace_ops_slow_ ## _name, \
+ .ftrace_ops_fast = &__kgr_patch_ftrace_ops_fast_ ## _name, \
+ }; \
+
+#define KGR_PATCH(name) &__kgr_patch_ ## name
+#define KGR_PATCH_END NULL
+
+extern int kgr_start_patching(const struct kgr_patch *);
+#endif /* CONFIG_KGR */
+
+#endif /* LINUX_KGR_H */
diff --git a/kernel/Kconfig.kgr b/kernel/Kconfig.kgr
new file mode 100644
index 000000000000..af9125f27b6d
--- /dev/null
+++ b/kernel/Kconfig.kgr
@@ -0,0 +1,7 @@
+config HAVE_KGR
+ bool
+
+config KGR
+ tristate "Kgr infrastructure"
+ depends on DYNAMIC_FTRACE_WITH_REGS
+ depends on HAVE_KGR
diff --git a/kernel/Makefile b/kernel/Makefile
index f2a8b6246ce9..86ac7a2e5fe0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -28,6 +28,7 @@ obj-y += printk/
obj-y += irq/
obj-y += rcu/

+obj-$(CONFIG_KGR) += kgr.o
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
diff --git a/kernel/kgr.c b/kernel/kgr.c
new file mode 100644
index 000000000000..6f55c7654618
--- /dev/null
+++ b/kernel/kgr.c
@@ -0,0 +1,308 @@
+/*
+ * kGraft Online Kernel Patching
+ *
+ * Copyright (c) 2013-2014 SUSE
+ * Authors: Jiri Kosina
+ * Vojtech Pavlik
+ * Jiri Slaby
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+#include <linux/kgr.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+static int kgr_patch_code(const struct kgr_patch_fun *patch_fun, bool final);
+static void kgr_work_fn(struct work_struct *work);
+
+static struct workqueue_struct *kgr_wq;
+static DECLARE_DELAYED_WORK(kgr_work, kgr_work_fn);
+static DEFINE_MUTEX(kgr_in_progress_lock);
+static bool kgr_in_progress;
+static bool kgr_initialized;
+static const struct kgr_patch *kgr_patch;
+
+static bool kgr_still_patching(void)
+{
+ struct task_struct *p;
+ bool failed = false;
+
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ /*
+ * TODO
+ * kernel thread codepaths not supported and silently ignored
+ */
+ if (task_thread_info(p)->kgr_in_progress && p->mm) {
+ pr_info("pid %d (%s) still in kernel after timeout\n",
+ p->pid, p->comm);
+ failed = true;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ return failed;
+}
+
+static void kgr_finalize(void)
+{
+ const struct kgr_patch_fun *const *patch_fun;
+
+ for (patch_fun = kgr_patch->patches; *patch_fun; patch_fun++) {
+ int ret = kgr_patch_code(*patch_fun, true);
+ /*
+ * In case any of the symbol resolutions in the set
+ * has failed, patch all the previously replaced fentry
+ * callsites back to nops and fail with grace
+ */
+ if (ret < 0)
+ pr_err("kgr: finalize for %s failed, trying to continue\n",
+ (*patch_fun)->name);
+ }
+}
+
+static void kgr_work_fn(struct work_struct *work)
+{
+ if (kgr_still_patching()) {
+ pr_info("kgr failed after timeout (%d), still in degraded mode\n",
+ KGR_TIMEOUT);
+ /* recheck again later */
+ queue_delayed_work(kgr_wq, &kgr_work, KGR_TIMEOUT * HZ);
+ return;
+ }
+
+ /*
+ * victory, patching finished, put everything back in shape
+ * with as less performance impact as possible again
+ */
+ pr_info("kgr succeeded\n");
+ kgr_finalize();
+ mutex_lock(&kgr_in_progress_lock);
+ kgr_in_progress = false;
+ mutex_unlock(&kgr_in_progress_lock);
+}
+
+static void kgr_mark_processes(void)
+{
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+ for_each_process(p)
+ task_thread_info(p)->kgr_in_progress = true;
+ read_unlock(&tasklist_lock);
+}
+
+static unsigned long kgr_get_fentry_loc(const char *f_name)
+{
+ unsigned long orig_addr, fentry_loc;
+ const char *check_name;
+ char check_buf[KSYM_SYMBOL_LEN];
+
+ orig_addr = kallsyms_lookup_name(f_name);
+ if (!orig_addr) {
+ WARN(1, "kgr: function %s not resolved ... kernel in inconsistent state\n",
+ f_name);
+ return -EINVAL;
+ }
+
+ fentry_loc = ftrace_function_to_fentry(orig_addr);
+ if (!fentry_loc) {
+ pr_err("kgr: fentry_loc not properly resolved\n");
+ return -EINVAL;
+ }
+
+ check_name = kallsyms_lookup(fentry_loc, NULL, NULL, NULL, check_buf);
+ if (strcmp(check_name, f_name)) {
+ pr_err("kgr: we got out of bounds the intended function (%s -> %s)\n",
+ f_name, check_name);
+ return -EINVAL;
+ }
+
+ return fentry_loc;
+}
+
+static int kgr_init_ftrace_ops(const struct kgr_patch_fun *patch_fun)
+{
+ struct kgr_loc_caches *caches;
+ unsigned long fentry_loc;
+
+ /*
+ * Initialize the ftrace_ops->private with pointers to the fentry
+ * sites of both old and new functions. This is used as a
+ * redirection target in the per-arch stubs.
+ *
+ * Beware! -- freeing (once unloading will be implemented)
+ * will require synchronize_sched() etc.
+ */
+
+ caches = kmalloc(sizeof(*caches), GFP_KERNEL);
+ if (!caches) {
+ kgr_debug("kgr: unable to allocate fentry caches\n");
+ return -ENOMEM;
+ }
+
+ fentry_loc = kgr_get_fentry_loc(patch_fun->new_name);
+ if (IS_ERR_VALUE(fentry_loc)) {
+ kgr_debug("kgr: fentry location lookup failed\n");
+ return fentry_loc;
+ }
+ kgr_debug("kgr: storing %lx to caches->new for %s\n",
+ fentry_loc, patch_fun->new_name);
+ caches->new = fentry_loc;
+
+ fentry_loc = kgr_get_fentry_loc(patch_fun->name);
+ if (IS_ERR_VALUE(fentry_loc)) {
+ kgr_debug("kgr: fentry location lookup failed\n");
+ return fentry_loc;
+ }
+
+ kgr_debug("kgr: storing %lx to caches->old for %s\n",
+ fentry_loc, patch_fun->name);
+ caches->old = fentry_loc;
+
+ patch_fun->ftrace_ops_fast->private = caches;
+ patch_fun->ftrace_ops_slow->private = caches;
+
+ return 0;
+}
+
+static int kgr_patch_code(const struct kgr_patch_fun *patch_fun, bool final)
+{
+ struct ftrace_ops *new_ops;
+ struct kgr_loc_caches *caches;
+ unsigned long fentry_loc;
+ int err;
+
+ /* Choose between slow and fast stub */
+ if (!final) {
+ err = kgr_init_ftrace_ops(patch_fun);
+ if (err)
+ return err;
+ kgr_debug("kgr: patching %s to slow stub\n", patch_fun->name);
+ new_ops = patch_fun->ftrace_ops_slow;
+ } else {
+ kgr_debug("kgr: patching %s to fast stub\n", patch_fun->name);
+ new_ops = patch_fun->ftrace_ops_fast;
+ }
+
+ /* Flip the switch */
+ caches = new_ops->private;
+ fentry_loc = caches->old;
+ err = ftrace_set_filter_ip(new_ops, fentry_loc, 0, 0);
+ if (err) {
+ kgr_debug("kgr: setting filter for %lx (%s) failed\n",
+ caches->old, patch_fun->name);
+ return err;
+ }
+
+ err = register_ftrace_function(new_ops);
+ if (err) {
+ kgr_debug("kgr: registering ftrace function for %lx (%s) failed\n",
+ caches->old, patch_fun->name);
+ return err;
+ }
+
+ /*
+ * Get rid of the slow stub. Having two stubs in the interim is fine,
+ * the last one always "wins", as it'll be dragged earlier from the
+ * ftrace hashtable
+ */
+ if (final) {
+ err = unregister_ftrace_function(patch_fun->ftrace_ops_slow);
+ if (err) {
+ kgr_debug("kgr: unregistering ftrace function for %lx (%s) failed\n",
+ fentry_loc, patch_fun->name);
+ return err;
+ }
+ }
+ kgr_debug("kgr: redirection for %lx (%s) done\n", fentry_loc,
+ patch_fun->name);
+
+ return 0;
+}
+
+/**
+ * kgr_start_patching -- the entry for a kgraft patch
+ * @patch: patch to be applied
+ *
+ * Start patching of code that is neither running in IRQ context nor
+ * kernel thread.
+ */
+int kgr_start_patching(const struct kgr_patch *patch)
+{
+ const struct kgr_patch_fun *const *patch_fun;
+
+ if (!kgr_initialized) {
+ pr_err("kgr: can't patch, not initialized\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&kgr_in_progress_lock);
+ if (kgr_in_progress) {
+ pr_err("kgr: can't patch, another patching not yet finalized\n");
+ mutex_unlock(&kgr_in_progress_lock);
+ return -EAGAIN;
+ }
+
+ for (patch_fun = patch->patches; *patch_fun; patch_fun++) {
+ int ret;
+
+ ret = kgr_patch_code(*patch_fun, false);
+ /*
+ * In case any of the symbol resolutions in the set
+ * has failed, patch all the previously replaced fentry
+ * callsites back to nops and fail with grace
+ */
+ if (ret < 0) {
+ for (; patch_fun >= patch->patches; patch_fun--)
+ unregister_ftrace_function((*patch_fun)->ftrace_ops_slow);
+ mutex_unlock(&kgr_in_progress_lock);
+ return ret;
+ }
+ }
+ kgr_in_progress = true;
+ kgr_patch = patch;
+ mutex_unlock(&kgr_in_progress_lock);
+
+ kgr_mark_processes();
+
+ /*
+ * give everyone time to exit kernel, and check after a while
+ */
+ queue_delayed_work(kgr_wq, &kgr_work, KGR_TIMEOUT * HZ);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kgr_start_patching);
+
+static int __init kgr_init(void)
+{
+ if (ftrace_is_dead()) {
+ pr_warning("kgr: enabled, but no fentry locations found ... aborting\n");
+ return -ENODEV;
+ }
+
+ kgr_wq = create_singlethread_workqueue("kgr");
+ if (!kgr_wq) {
+ pr_err("kgr: cannot allocate a work queue, aborting!\n");
+ return -ENOMEM;
+ }
+
+ kgr_initialized = true;
+ pr_info("kgr: successfully initialized\n");
+
+ return 0;
+}
+module_init(kgr_init);
--
1.9.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/