Re: [PATCH] tracing/function-graph-tracer: tracing only syscallsmode

From: Steven Rostedt
Date: Mon Jan 05 2009 - 14:11:23 EST



On Tue, 30 Dec 2008, Frederic Weisbecker wrote:

> Impact: make more easy the syscalls tracing
>
> This patch extends to syscalls the features which let one to trace
> only one ore more function (and those they call).
> This way we can get rid of the workqueues, kernel threads, softirqs from
> ksoftirqd and only have the syscall path on the trace.
>
> Note that hardirq that interrupt the syscalls are still traced. But we could
> add an option to disable the interrupt tracing as well in the future.
>
> To use this new feature:
>
> echo function_graph > /debugfs/tracing/current_tracer
> echo syscalls > /debugfs/tracing/set_graph_function
>
> Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> ---
> arch/x86/include/asm/thread_info.h | 4 +-
> arch/x86/kernel/ptrace.c | 4 ++
> include/linux/ftrace.h | 104 ++++++++++++++++++++----------------
> kernel/trace/ftrace.c | 61 +++++++++++++++++++++
> kernel/trace/trace.h | 3 +-
> 5 files changed, 127 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index efdf938..447fa17 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -94,6 +94,7 @@ struct thread_info {
> #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
> #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
> #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
> +#define TIF_SYSCALL_FTRACE 27 /* For ftrace syscalls tracing */
>
> #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
> @@ -116,10 +117,11 @@ struct thread_info {
> #define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
> #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
> #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
> +#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
>
> /* work to do in syscall_trace_enter() */
> #define _TIF_WORK_SYSCALL_ENTRY \
> - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
> + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE |\
> _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
>
> /* work to do in syscall_trace_leave() */
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index 0a5df5f..1a6e72f 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -21,6 +21,7 @@
> #include <linux/audit.h>
> #include <linux/seccomp.h>
> #include <linux/signal.h>
> +#include <linux/ftrace.h>
>
> #include <asm/uaccess.h>
> #include <asm/pgtable.h>
> @@ -1397,6 +1398,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
> {
> long ret = 0;
>
> + /* Tell the function graph tracer we are entering a system call */
> + ftrace_graph_syscall_enter();
> +

You would not mind me converting this to a tracepoint so that other
tracers could tap into this.

> /*
> * If we stepped into a sysenter/syscall insn, it trapped in
> * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 677432b..4f22902 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -357,6 +357,52 @@ static inline void trace_power_end(struct power_trace *it) { }
> #endif
>
>
> +#ifdef CONFIG_TRACING
> +#include <linux/sched.h>
> +
> +/* flags for current->trace */
> +enum {
> + TSK_TRACE_FL_TRACE_BIT = 0,
> + TSK_TRACE_FL_GRAPH_BIT = 1,
> +};
> +enum {
> + TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
> + TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
> +};
> +
> +static inline void set_tsk_trace_trace(struct task_struct *tsk)
> +{
> + set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> +}
> +
> +static inline void clear_tsk_trace_trace(struct task_struct *tsk)
> +{
> + clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> +}
> +
> +static inline int test_tsk_trace_trace(struct task_struct *tsk)
> +{
> + return tsk->trace & TSK_TRACE_FL_TRACE;
> +}
> +
> +static inline void set_tsk_trace_graph(struct task_struct *tsk)
> +{
> + set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> +}
> +
> +static inline void clear_tsk_trace_graph(struct task_struct *tsk)
> +{
> + clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> +}
> +
> +static inline int test_tsk_trace_graph(struct task_struct *tsk)
> +{
> + return tsk->trace & TSK_TRACE_FL_GRAPH;
> +}
> +
> +#endif /* CONFIG_TRACING */
> +
> +
> /*
> * Structure that defines an entry function trace.
> */
> @@ -430,7 +476,15 @@ static inline void unpause_graph_tracing(void)
> {
> atomic_dec(&current->tracing_graph_pause);
> }
> -#else
> +
> +extern bool ftrace_graph_syscalls;
> +
> +static inline void ftrace_graph_syscall_enter(void)
> +{
> + if (ftrace_graph_syscalls)
> + set_tsk_trace_graph(current);

We could eliminate this global variable with the tracepoint.

> +}
> +#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
>
> #define __notrace_funcgraph
> #define __irq_entry
> @@ -445,51 +499,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
>
> static inline void pause_graph_tracing(void) { }
> static inline void unpause_graph_tracing(void) { }
> -#endif
> -
> -#ifdef CONFIG_TRACING
> -#include <linux/sched.h>
> -
> -/* flags for current->trace */
> -enum {
> - TSK_TRACE_FL_TRACE_BIT = 0,
> - TSK_TRACE_FL_GRAPH_BIT = 1,
> -};
> -enum {
> - TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
> - TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
> -};
> -
> -static inline void set_tsk_trace_trace(struct task_struct *tsk)
> -{
> - set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> -}
> -
> -static inline void clear_tsk_trace_trace(struct task_struct *tsk)
> -{
> - clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> -}
> -
> -static inline int test_tsk_trace_trace(struct task_struct *tsk)
> -{
> - return tsk->trace & TSK_TRACE_FL_TRACE;
> -}
> -
> -static inline void set_tsk_trace_graph(struct task_struct *tsk)
> -{
> - set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> -}
> -
> -static inline void clear_tsk_trace_graph(struct task_struct *tsk)
> -{
> - clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> -}
> -
> -static inline int test_tsk_trace_graph(struct task_struct *tsk)
> -{
> - return tsk->trace & TSK_TRACE_FL_GRAPH;
> -}
> -
> -#endif /* CONFIG_TRACING */
> +static inline void ftrace_graph_syscall_enter(void) { }
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>
> #endif /* _LINUX_FTRACE_H */
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 2f32969..9ed22ee 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1338,6 +1338,40 @@ static DEFINE_MUTEX(graph_lock);
>
> int ftrace_graph_count;
> unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
> +bool ftrace_graph_syscalls;
> +static char *syscall_str = "syscalls";
> +
> +/* Tell ptrace that we trace all syscalls from all tasks */
> +static void ftrace_graph_syscalls_start(void)
> +{
> + struct task_struct *g, *t;
> + unsigned long flags;
> +
> + ftrace_graph_syscalls = true;

If we keep this global, it would be better to have it after you set the
TIF_SYSCALL_FTRACE flag, otherwise your traces might look funny. With some
tasks being traced and others not.

-- Steve

> +
> + read_lock_irqsave(&tasklist_lock, flags);
> + do_each_thread(g, t) {
> + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> + } while_each_thread(g, t);
> +
> + read_unlock_irqrestore(&tasklist_lock, flags);
> +}
> +
> +/* Remove the syscall flag trace for all tasks */
> +static void ftrace_graph_syscalls_stop(void)
> +{
> + struct task_struct *g, *t;
> + unsigned long flags;
> +
> + ftrace_graph_syscalls = false;
> +
> + read_lock_irqsave(&tasklist_lock, flags);
> + do_each_thread(g, t) {
> + clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> + } while_each_thread(g, t);
> +
> + read_unlock_irqrestore(&tasklist_lock, flags);
> +}
>
> static void *
> g_next(struct seq_file *m, void *v, loff_t *pos)
> @@ -1359,6 +1393,13 @@ static void *g_start(struct seq_file *m, loff_t *pos)
>
> mutex_lock(&graph_lock);
>
> + /*
> + * Syscalls tracing is special and is not on the
> + * ftrace_graph_funcs tab. Print it as the first entry.
> + */
> + if (!*pos && ftrace_graph_syscalls)
> + return syscall_str;
> +
> p = g_next(m, p, pos);
>
> return p;
> @@ -1377,6 +1418,11 @@ static int g_show(struct seq_file *m, void *v)
> if (!ptr)
> return 0;
>
> + if (v == syscall_str) {
> + seq_printf(m, "%s\n", syscall_str);
> + return 0;
> + }
> +
> kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
>
> seq_printf(m, "%s\n", str);
> @@ -1403,6 +1449,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
> if ((file->f_mode & FMODE_WRITE) &&
> !(file->f_flags & O_APPEND)) {
> ftrace_graph_count = 0;
> + ftrace_graph_syscalls_stop();
> memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
> }
>
> @@ -1533,6 +1580,15 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
> }
> buffer[index] = 0;
>
> + /*
> + * Catch the special syscall tracing request,
> + * it is not enabled like other functions.
> + */
> + if (!strcmp(syscall_str, buffer)) {
> + ftrace_graph_syscalls_start();
> + goto out_syscalls;
> + }
> +
> /* we allow only one at a time */
> ret = ftrace_set_func(array, ftrace_graph_count, buffer);
> if (ret)
> @@ -1540,6 +1596,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
>
> ftrace_graph_count++;
>
> +out_syscalls:
> file->f_pos += read;
>
> ret = read;
> @@ -2091,6 +2148,10 @@ void ftrace_graph_init_task(struct task_struct *t)
> t->curr_ret_stack = -1;
> atomic_set(&t->tracing_graph_pause, 0);
> atomic_set(&t->trace_overrun, 0);
> +
> + if (ftrace_graph_syscalls)
> + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> +
> } else
> t->ret_stack = NULL;
> }
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index a8b624c..f372fbb 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -521,7 +521,8 @@ static inline int ftrace_graph_addr(unsigned long addr)
> {
> int i;
>
> - if (!ftrace_graph_count || test_tsk_trace_graph(current))
> + if ((!ftrace_graph_count && !ftrace_graph_syscalls)
> + || test_tsk_trace_graph(current))
> return 1;
>
> for (i = 0; i < ftrace_graph_count; i++) {
> --
> 1.6.0.4
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/