[PATCH] csky/ftrace: Add dynamic function tracer (include graph tracer)

From: guoren
Date: Sat Mar 16 2019 - 02:19:12 EST


From: Guo Ren <ren_guo@xxxxxxxxx>

Support dynamic ftrace including dynamic graph tracer. Gcc-csky with -pg
will produce call site in every function prologue and we can use these
call site to hook trace function.

gcc with -pg origin call site:
push lr
jbsr _mcount
nop
nop
nop
nop

If the (callee - caller)'s offset is in range of bsr instruction, we'll
modify code with:
push lr
bsr _mcount
nop
nop
nop
nop
Else if the (callee - caller)'s offset is out of bsr instrunction, we'll
modify code with:
push lr
movih r26, ...
ori r26, ...
jsr r26

(r26 is reserved for jsr link reg in csky abiv2 spec.)

Signed-off-by: Guo Ren <ren_guo@xxxxxxxxx>
---
arch/csky/Kconfig | 3 +-
arch/csky/abiv2/mcount.S | 43 +++++++++++-
arch/csky/include/asm/ftrace.h | 18 ++++-
arch/csky/kernel/ftrace.c | 148 ++++++++++++++++++++++++++++++++++++++++-
scripts/recordmcount.pl | 3 +
5 files changed, 209 insertions(+), 6 deletions(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 398113c..114cb2f 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -28,14 +28,15 @@ config CSKY
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_TRACEHOOK
+ select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_GENERIC_DMA_COHERENT
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HAVE_PERF_EVENTS
- select HAVE_C_RECORDMCOUNT
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select MAY_HAVE_SPARSE_IRQ
diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S
index c633379..7a66fed 100644
--- a/arch/csky/abiv2/mcount.S
+++ b/arch/csky/abiv2/mcount.S
@@ -61,10 +61,20 @@
addi sp, 16
.endm

+.macro nop_stub_6
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+.endm
+
ENTRY(ftrace_stub)
jmp lr
END(ftrace_stub)

+#ifndef CONFIG_DYNAMIC_FTRACE
ENTRY(_mcount)
mcount_enter

@@ -76,7 +86,7 @@ ENTRY(_mcount)
bf skip_ftrace

mov a0, lr
- subi a0, MCOUNT_INSN_SIZE
+ subi a0, 4
ldw a1, (sp, 24)

jsr r26
@@ -101,13 +111,42 @@ skip_ftrace:
mcount_exit
#endif
END(_mcount)
+#else /* CONFIG_DYNAMIC_FTRACE */
+ENTRY(_mcount)
+ mov t1, lr
+ ldw lr, (sp, 0)
+ addi sp, 4
+ jmp t1
+ENDPROC(_mcount)
+
+ENTRY(ftrace_caller)
+ mcount_enter
+
+ ldw a0, (sp, 16)
+ subi a0, 4
+ ldw a1, (sp, 24)
+
+ nop
+GLOBAL(ftrace_call)
+ nop_stub_6
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ nop
+GLOBAL(ftrace_graph_call)
+ nop_stub_6
+#endif
+
+ mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
+GLOBAL(ftrace_graph_caller)
mov a0, sp
addi a0, 24
ldw a1, (sp, 16)
- subi a1, MCOUNT_INSN_SIZE
+ subi a1, 4
mov a2, r8
lrw r26, prepare_ftrace_return
jsr r26
diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h
index 7547c45..ba35d93 100644
--- a/arch/csky/include/asm/ftrace.h
+++ b/arch/csky/include/asm/ftrace.h
@@ -4,10 +4,26 @@
#ifndef __ASM_CSKY_FTRACE_H
#define __ASM_CSKY_FTRACE_H

-#define MCOUNT_INSN_SIZE 4
+#define MCOUNT_INSN_SIZE 14

#define HAVE_FUNCTION_GRAPH_FP_TEST

#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR

+#define MCOUNT_ADDR ((unsigned long)_mcount)
+
+#ifndef __ASSEMBLY__
+
+extern void _mcount(unsigned long);
+
+extern void ftrace_graph_call(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* !__ASSEMBLY__ */
#endif /* __ASM_CSKY_FTRACE_H */
diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
index 274c431..ae3870e 100644
--- a/arch/csky/kernel/ftrace.c
+++ b/arch/csky/kernel/ftrace.c
@@ -3,6 +3,137 @@

#include <linux/ftrace.h>
#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define NOP 0x4000
+#define PUSH_LR 0x14d0
+#define MOVIH_LINK 0xea3a
+#define ORI_LINK 0xef5a
+#define JSR_LINK 0xe8fa
+#define BSR_LINK 0xe000
+
+/*
+ * Gcc-csky with -pg will insert stub in function prologue:
+ * push lr
+ * jbsr _mcount
+ * nop
+ * nop
+ * nop
+ * nop
+ *
+ * If the (callee - current_pc) is less then 64MB, we'll use bsr:
+ * push lr
+ * bsr _mcount
+ * nop
+ * nop
+ * nop
+ * nop
+ * else we'll use (movih + ori + jsr):
+ * push lr
+ * movih r26, ...
+ * ori r26, ...
+ * jsr r26
+ *
+ * (r26 is our reserved link-reg)
+ *
+ */
+static inline void make_jbsr(unsigned long callee, unsigned long pc,
+ uint16_t *call, bool nolr)
+{
+ long offset;
+
+ call[0] = nolr ? NOP : PUSH_LR;
+
+ offset = (long) callee - (long) pc;
+
+ if (unlikely(offset < -67108864 || offset > 67108864)) {
+ call[1] = MOVIH_LINK;
+ call[2] = callee >> 16;
+ call[3] = ORI_LINK;
+ call[4] = callee & 0xffff;
+ call[5] = JSR_LINK;
+ call[6] = 0;
+ } else {
+ offset = offset >> 1;
+
+ call[1] = BSR_LINK |
+ ((uint16_t)((unsigned long) offset >> 16) & 0x3ff);
+ call[2] = (uint16_t)((unsigned long) offset & 0xffff);
+ call[3] = call[4] = call[5] = call[6] = NOP;
+ }
+}
+
+static int ftrace_check_current_nop(unsigned long hook)
+{
+ uint16_t olds[7];
+ uint16_t nops[7] = {NOP, NOP, NOP, NOP, NOP, NOP, NOP};
+ unsigned long hook_pos = hook - 2;
+
+ if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops)))
+ return -EFAULT;
+
+ if (memcmp((void *)nops, (void *)olds, sizeof(nops))) {
+ pr_err("%p: nop but get (%04x %04x %04x %04x %04x %04x %04x)\n",
+ (void *)hook_pos,
+ olds[0], olds[1], olds[2], olds[3], olds[4], olds[5],
+ olds[6]);
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ftrace_modify_code(unsigned long hook, unsigned long target,
+ bool enable, bool nolr)
+{
+ uint16_t call[7];
+ uint16_t nops[7] = {NOP, NOP, NOP, NOP, NOP, NOP, NOP};
+ unsigned long hook_pos = hook - 2;
+ int ret = 0;
+
+ make_jbsr(target, hook, call, nolr);
+
+ ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
+ sizeof(nops));
+ if (ret)
+ return -EPERM;
+
+ flush_icache_range(hook_pos, hook_pos + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ int ret = ftrace_check_current_nop(rec->ip);
+
+ if (ret)
+ return ret;
+
+ return ftrace_modify_code(rec->ip, addr, true, false);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ return ftrace_modify_code(rec->ip, addr, false, false);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ int ret = ftrace_modify_code((unsigned long)&ftrace_call,
+ (unsigned long)func, true, true);
+ return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
@@ -43,8 +174,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*(unsigned long *)frame_pointer = return_hooker;
}
}
-#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_code((unsigned long)&ftrace_graph_call,
+ (unsigned long)&ftrace_graph_caller, true, true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_code((unsigned long)&ftrace_graph_call,
+ (unsigned long)&ftrace_graph_caller, false, true);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

/* _mcount is defined in abi's mcount.S */
-extern void _mcount(void);
EXPORT_SYMBOL(_mcount);
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 68841d0..f716668 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -397,6 +397,9 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "nds32") {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_NDS32_HI20_RELA\\s+_mcount\$";
$alignment = 2;
+} elsif ($arch eq "csky") {
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_CKCORE_PCREL_JSR_IMM26BY2\\s+_mcount\$";
+ $alignment = 2;
} else {
die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
}
--
2.7.4