Re: [PATCH v2 03/11] tracing: add soft disable for syscall events

From: zhangwei(Jovi)
Date: Sat Jun 29 2013 - 02:27:31 EST


On 2013/6/29 13:08, Tom Zanussi wrote:
> Add support for SOFT_DISABLE to syscall events.
>
> The original SOFT_DISABLE patches didn't add support for soft disable
> of syscall events; this adds it and paves the way for future patches
> allowing triggers to be added to syscall events, since triggers are
> built on top of SOFT_DISABLE.
>
> The existing code grabs the trace_array from the ftrace_file passed to
> the event registration functions and passes that to the probe
> functions. Passing the file instead allows the probe functions to
> access not only the trace_array attached to the file but the flags as
> well.
>
> Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
> ---
> kernel/trace/trace_syscalls.c | 20 ++++++++++++++------
> 1 file changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 8f2ac73..1765088 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -301,7 +301,8 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
>
> static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
> {
> - struct trace_array *tr = data;
> + struct ftrace_event_file *ftrace_file = data;
> + struct trace_array *tr = ftrace_file->tr;
> struct syscall_trace_enter *entry;
> struct syscall_metadata *sys_data;
> struct ring_buffer_event *event;
> @@ -319,6 +320,9 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
> if (!sys_data)
> return;
>
> + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
> + return;
> +
> size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
>
> buffer = tr->trace_buffer.buffer;
> @@ -338,7 +342,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
>
> static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
> {
> - struct trace_array *tr = data;
> + struct ftrace_event_file *ftrace_file = data;
> + struct trace_array *tr = ftrace_file->tr;
> struct syscall_trace_exit *entry;
> struct syscall_metadata *sys_data;
> struct ring_buffer_event *event;
> @@ -355,6 +360,9 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
> if (!sys_data)
> return;
>
> + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
> + return;
> +
> buffer = tr->trace_buffer.buffer;
> event = trace_buffer_lock_reserve(buffer,
> sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
> @@ -382,7 +390,7 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file,
> return -ENOSYS;
> mutex_lock(&syscall_trace_lock);
> if (!tr->sys_refcount_enter)
> - ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
> + ret = register_trace_sys_enter(ftrace_syscall_enter, file);
> if (!ret) {
> set_bit(num, tr->enabled_enter_syscalls);
> tr->sys_refcount_enter++;

Is this change can work correctly?

It seems that all syscalls in same tr will use same ftrace_event_file(first registered)
in ftrace_syscall_enter/ftrace_syscall_exit, obviously this is wrong.

Basically I think we still need pass tr into register_trace_sys_enter/exit, for
performance reason. If you use ftrace_event_file as argument, then when your are
using command 'perf stat -e syscalls:* -a sleep 10',
it will looping NR_SYSCALLS tracepoints for every syscall enter and exit,
that's unacceptable.

Thanks.

> @@ -404,7 +412,7 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file,
> tr->sys_refcount_enter--;
> clear_bit(num, tr->enabled_enter_syscalls);
> if (!tr->sys_refcount_enter)
> - unregister_trace_sys_enter(ftrace_syscall_enter, tr);
> + unregister_trace_sys_enter(ftrace_syscall_enter, file);
> mutex_unlock(&syscall_trace_lock);
> }
>
> @@ -420,7 +428,7 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file,
> return -ENOSYS;
> mutex_lock(&syscall_trace_lock);
> if (!tr->sys_refcount_exit)
> - ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
> + ret = register_trace_sys_exit(ftrace_syscall_exit, file);
> if (!ret) {
> set_bit(num, tr->enabled_exit_syscalls);
> tr->sys_refcount_exit++;
> @@ -442,7 +450,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,
> tr->sys_refcount_exit--;
> clear_bit(num, tr->enabled_exit_syscalls);
> if (!tr->sys_refcount_exit)
> - unregister_trace_sys_exit(ftrace_syscall_exit, tr);
> + unregister_trace_sys_exit(ftrace_syscall_exit, file);
> mutex_unlock(&syscall_trace_lock);
> }
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/