[RFC][PROTO][PATCH -tip 6/7] kprobes: x86: support kprobes jump optimizationon x86

From: Masami Hiramatsu
Date: Mon Apr 06 2009 - 17:43:44 EST


Introduce x86 arch-specific optimization code, which supports both of
x86-32 and x86-64.

Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
---

arch/x86/Kconfig | 1
arch/x86/include/asm/kprobes.h | 25 +++-
arch/x86/kernel/kprobes.c | 280 ++++++++++++++++++++++++++++++++++++++--
3 files changed, 290 insertions(+), 16 deletions(-)


diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eebd3ad..feca11f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
select HAVE_KRETPROBES
+ select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4fe681d..492458a 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -32,7 +32,10 @@ struct kprobe;

typedef u8 kprobe_opcode_t;
#define BREAKPOINT_INSTRUCTION 0xcc
-#define RELATIVEJUMP_INSTRUCTION 0xe9
+#define RELATIVEJUMP_OPCODE 0xe9
+#define RELATIVECALL_OPCODE 0xe8
+#define RELATIVE_ADDR_SIZE 4
+#define RELATIVE_JUMP_SIZE (sizeof(kprobe_opcode_t) + RELATIVE_ADDR_SIZE)
#define MAX_INSN_SIZE 16
#define MAX_STACK_SIZE 64
#define MIN_STACK_SIZE(ADDR) \
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t;

#define flush_insn_slot(p) do { } while (0)

+/* optinsn template addresses */
+extern kprobe_opcode_t optprobe_template_entry;
+extern kprobe_opcode_t optprobe_template_val;
+extern kprobe_opcode_t optprobe_template_call;
+extern kprobe_opcode_t optprobe_template_end;
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTINSN_SIZE \
+ (((unsigned long)&optprobe_template_end - \
+ (unsigned long)&optprobe_template_entry) + \
+ MAX_OPTIMIZED_LENGTH + RELATIVE_JUMP_SIZE)
+
extern const int kretprobe_blacklist_size;

void arch_remove_kprobe(struct kprobe *p);
@@ -64,6 +78,15 @@ struct arch_specific_insn {
int boostable;
};

+struct arch_optimized_insn {
+ /* copy of the original instructions */
+ kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+ /* detour code buffer */
+ kprobe_opcode_t *insn;
+ /* length of copied instructions */
+ int length;
+};
+
struct prev_kprobe {
struct kprobe *kp;
unsigned long status;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index fcce435..5635e02 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -161,16 +161,36 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
};
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);

-/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes set_jmp_op(void *from, void *to)
+/*
+ * On pentium series, Unsynchronized cross-modifying code
+ * operations can cause unexpected instruction execution results.
+ * So after code modified, we should synchronize it on each processor.
+ */
+static void __local_serialize_cpu(void *info)
{
- struct __arch_jmp_op {
- char op;
+ sync_core();
+}
+
+void arch_serialize_cpus(void)
+{
+ on_each_cpu(__local_serialize_cpu, NULL, 1);
+}
+
+static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
+{
+ struct __arch_relative_insn {
+ u8 op;
s32 raddr;
- } __attribute__((packed)) * jop;
- jop = (struct __arch_jmp_op *)from;
- jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
- jop->op = RELATIVEJUMP_INSTRUCTION;
+ } __attribute__((packed)) *insn;
+ insn = (struct __arch_relative_insn *)from;
+ insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
+ insn->op = op;
+}
+
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static void __kprobes synthesize_reljump(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
}

/*
@@ -326,10 +346,10 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
* If not, return null.
* Only applicable to 64-bit x86.
*/
-static void __kprobes fix_riprel(struct kprobe *p)
+static void __kprobes fix_riprel(unsigned long ssol, unsigned long orig)
{
#ifdef CONFIG_X86_64
- u8 *insn = p->ainsn.insn;
+ u8 *insn = (u8 *)ssol;
s64 disp;
int need_modrm;

@@ -386,8 +406,8 @@ static void __kprobes fix_riprel(struct kprobe *p)
* sign-extension of the original signed 32-bit
* displacement would have given.
*/
- disp = (u8 *) p->addr + *((s32 *) insn) -
- (u8 *) p->ainsn.insn;
+ disp = (u8 *) orig + *((s32 *) insn) -
+ (u8 *) ssol;
BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
*(s32 *)insn = (s32) disp;
}
@@ -399,7 +419,7 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
{
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));

- fix_riprel(p);
+ fix_riprel((unsigned long)p->ainsn.insn, (unsigned long)p->addr);

if (can_boost(p->addr))
p->ainsn.boostable = 0;
@@ -895,8 +915,8 @@ static void __kprobes resume_execution(struct kprobe *p,
* These instructions can be executed directly if it
* jumps back to correct address.
*/
- set_jmp_op((void *)regs->ip,
- (void *)orig_ip + (regs->ip - copy_ip));
+ synthesize_reljump((void *)regs->ip,
+ (void *)orig_ip + (regs->ip - copy_ip));
p->ainsn.boostable = 1;
} else {
p->ainsn.boostable = -1;
@@ -1117,6 +1137,236 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}

+
+#ifdef CONFIG_OPTPROBES
+
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+static void __kprobes synthesize_relcall(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
+ unsigned long val)
+{
+#ifdef CONFIG_X86_64
+ *addr++ = 0x48;
+ *addr++ = 0xbf;
+#else
+ *addr++ = 0xb8;
+#endif
+ *(unsigned long *)addr = val;
+}
+
+void __kprobes kprobes_optinsn_template_holder(void)
+{
+ asm volatile (
+ ".global optprobe_template_entry\n"
+ "optprobe_template_entry: \n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rsi\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val: \n"
+ ASM_NOP5
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call: \n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " popfq\n"
+ /* Skip rsp */
+ " addq $8, %rsp\n"
+#else /* CONFIG_X86_32 */
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %edx\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val: \n"
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call: \n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " addl $4, %esp\n" /* skip cs */
+ " popf\n"
+#endif
+ ".global optprobe_template_end\n"
+ "optprobe_template_end: \n");
+}
+
+/* optimized kprobe call back function: called from optinsn */
+static void optimized_callback(struct optimized_kprobe *op,
+ struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ preempt_disable();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ /* save skipped registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
+#endif
+ regs->ip = (unsigned long)op->kp.addr;
+ regs->orig_ax = ~0UL;
+
+ __get_cpu_var(current_kprobe) = &op->kp;
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ aggr_pre_handler(&op->kp, regs);
+ __get_cpu_var(current_kprobe) = NULL;
+ }
+ preempt_enable_no_resched();
+}
+
+
+#define TMPL_MOVE_IDX \
+ ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+ ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+ ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+static int __kprobes prepare_copied_insn(u8 *buf, struct optimized_kprobe *op)
+{
+ struct insn insn;
+ int len = 0;
+ while (len < RELATIVE_JUMP_SIZE) {
+ if (!can_boost(buf + len))
+ return -EINVAL;
+ fix_riprel((unsigned long)buf + len,
+ (unsigned long)op->kp.addr);
+ insn_init(&insn, buf + len, 0);
+ insn_get_length(&insn);
+ len += insn.length;
+ }
+ return len;
+}
+
+int arch_optimized_kprobe_address(struct optimized_kprobe *op,
+ unsigned long addr)
+{
+ return ((addr > (unsigned long)op->kp.addr) &&
+ (addr < (unsigned long)op->kp.addr + op->optinsn.length));
+}
+
+/*
+ * Copy post processing instructions
+ * Target instructions MUST be relocatable.
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+ u8 *buf;
+ int ret, i;
+
+ op->optinsn.insn = get_optinsn_slot();
+ if (!op->optinsn.insn)
+ return -ENOMEM;
+
+ buf = (u8 *)op->optinsn.insn;
+
+ /* copy arch-dep-instance from template */
+ memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+ /* set probe information */
+ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+ /* set probe function call */
+ synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+ /* copy instructions into the out-of-line buffer */
+ memcpy(buf + TMPL_END_IDX, op->kp.addr, MAX_OPTIMIZED_LENGTH);
+
+ /* overwrite int3 */
+ memcpy(buf + TMPL_END_IDX, &op->kp.opcode, INT3_SIZE);
+
+ /* backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ ret = prepare_copied_insn(buf + TMPL_END_IDX, op);
+ if (ret < 0)
+ goto error;
+
+ op->optinsn.length = ret;
+ /* check whether there is another kprobes */
+ for (i = 1; i < op->optinsn.length; i++)
+ if (get_kprobe(op->kp.addr + i)) {
+ ret = -EEXIST;
+ goto error;
+ }
+
+ /* set returning jmp instruction at the tail of out-of-line buffer */
+ synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.length,
+ (u8 *)op->kp.addr + op->optinsn.length);
+
+ flush_icache_range((unsigned long) buf,
+ (unsigned long) buf + TMPL_END_IDX +
+ op->optinsn.length + RELATIVE_JUMP_SIZE);
+ return 0;
+error:
+ free_optinsn_slot(op->optinsn.insn, 0);
+ return ret;
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ if (op->optinsn.insn)
+ free_optinsn_slot(op->optinsn.insn, 0);
+}
+
+int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+{
+ kprobe_opcode_t opcode = RELATIVEJUMP_OPCODE;
+ long rel = (long)(op->optinsn.insn) -
+ ((long)(op->kp.addr) + RELATIVE_JUMP_SIZE);
+ /* TODO: check safety */
+
+ /* insert the destination address only */
+ text_poke((void *)((char *)op->kp.addr + INT3_SIZE), &rel,
+ RELATIVE_ADDR_SIZE);
+ arch_serialize_cpus();
+
+ /* overwrite breakpoint to reljump */
+ text_poke(op->kp.addr, &opcode, sizeof(kprobe_opcode_t));
+ arch_serialize_cpus();
+ return 0;
+}
+
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ /* change (the 1st byte of) jump to int3. */
+ arch_arm_kprobe(&op->kp);
+ arch_serialize_cpus();
+ /*
+ * recover the instructions covered by the destination address.
+ * the int3 will be removed by arch_disarm_kprobe()
+ */
+ text_poke((void *)((long)op->kp.addr + INT3_SIZE),
+ (void *)op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+}
+
+/* djprobe handler : switch to a bypass code */
+int __kprobes arch_detour_optimized_kprobe(struct optimized_kprobe *op,
+ struct pt_regs *regs)
+{
+ regs->ip = (unsigned long)op->optinsn.insn;
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1; /* already prepared */
+}
+#endif
+
int __init arch_init_kprobes(void)
{
return 0;
--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@xxxxxxxxxx

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/