Re: [PATCH 7/9] ftrace, perf: Add support to use functiontracepoint in perf

From: Steven Rostedt
Date: Mon Nov 28 2011 - 14:58:41 EST


On Sun, 2011-11-27 at 19:04 +0100, Jiri Olsa wrote:
> Adding perf registration support for the ftrace function event,
> so it is now possible to register it via perf interface.
>
> The perf_event struct statically contains ftrace_ops as a handle
> for function tracer. The function tracer is registered/unregistered
> in open/close actions, and enabled/disabled in add/del actions.
>
> It is now possible to use function trace within perf commands
> like:
>
> perf record -e ftrace:function ls
> perf stat -e ftrace:function ls

Question. This is a root only command, correct? Otherwise, we are
allowing any user to create a large performance impact to the system.

>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
> include/linux/perf_event.h | 3 +
> kernel/trace/trace_event_perf.c | 88 +++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_export.c | 23 ++++++++++
> 3 files changed, 114 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 1e9ebe5..6071995 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -847,6 +847,9 @@ struct perf_event {
> #ifdef CONFIG_EVENT_TRACING
> struct ftrace_event_call *tp_event;
> struct event_filter *filter;
> +#ifdef CONFIG_FUNCTION_TRACER
> + struct ftrace_ops ftrace_ops;
> +#endif
> #endif
>
> #ifdef CONFIG_CGROUP_PERF
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index d72af0b..4be0f73 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -250,3 +250,91 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
> return raw_data;
> }
> EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
> +
> +
> +static void
> +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
> +{
> + struct ftrace_entry *entry;
> + struct hlist_head *head;
> + struct pt_regs regs;
> + int rctx;
> +
> +#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
> + sizeof(u64)) - sizeof(u32))
> +
> + BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
> +
> + perf_fetch_caller_regs(&regs);
> +
> + entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
> + if (!entry)
> + return;
> +
> + entry->ip = ip;
> + entry->parent_ip = parent_ip;
> +
> + head = this_cpu_ptr(event_function.perf_events);
> + perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
> + 1, &regs, head);
> +
> +#undef ENTRY_SIZE
> +}
> +
> +static int perf_ftrace_function_register(struct perf_event *event)
> +{
> + struct ftrace_ops *ops = &event->ftrace_ops;
> +
> + ops->flags |= FTRACE_OPS_FL_CONTROL;
> + atomic_set(&ops->disabled, 1);
> + ops->func = perf_ftrace_function_call;
> + return register_ftrace_function(ops);

When is ADD called? Because as soon as you register this function, even
though you have it "disabled" the system takes about a 13% impact on
performance just by calling this.

> +}
> +
> +static int perf_ftrace_function_unregister(struct perf_event *event)
> +{
> + struct ftrace_ops *ops = &event->ftrace_ops;
> + return unregister_ftrace_function(ops);
> +}
> +
> +static void perf_ftrace_function_enable(struct perf_event *event)
> +{
> + struct ftrace_ops *ops = &event->ftrace_ops;
> + enable_ftrace_function(ops);

Is it really an issue that we shouldn't call the full blown register
instead? I'm not really understanding why this is a problem. Note, one
of the improvements to ftrace in the near future is to enable ftrace
without stop_machine.

-- Steve

> +}
> +
> +static void perf_ftrace_function_disable(struct perf_event *event)
> +{
> + struct ftrace_ops *ops = &event->ftrace_ops;
> + disable_ftrace_function(ops);
> +}
> +
> +int perf_ftrace_event_register(struct ftrace_event_call *call,
> + enum trace_reg type, void *data)
> +{
> + int etype = call->event.type;
> +
> + if (etype != TRACE_FN)
> + return -EINVAL;
> +
> + switch (type) {
> + case TRACE_REG_REGISTER:
> + case TRACE_REG_UNREGISTER:
> + break;
> + case TRACE_REG_PERF_REGISTER:
> + case TRACE_REG_PERF_UNREGISTER:
> + return 0;
> + case TRACE_REG_PERF_OPEN:
> + return perf_ftrace_function_register(data);
> + case TRACE_REG_PERF_CLOSE:
> + return perf_ftrace_function_unregister(data);
> + case TRACE_REG_PERF_ADD:
> + perf_ftrace_function_enable(data);
> + return 0;
> + case TRACE_REG_PERF_DEL:
> + perf_ftrace_function_disable(data);
> + return 0;
> + }
> +
> + return -EINVAL;
> +}
> diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
> index bbeec31..62e86a5 100644
> --- a/kernel/trace/trace_export.c
> +++ b/kernel/trace/trace_export.c
> @@ -131,6 +131,28 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
>
> #include "trace_entries.h"
>
> +static int ftrace_event_class_register(struct ftrace_event_call *call,
> + enum trace_reg type, void *data)
> +{
> + switch (type) {
> + case TRACE_REG_PERF_REGISTER:
> + case TRACE_REG_PERF_UNREGISTER:
> + return 0;
> + case TRACE_REG_PERF_OPEN:
> + case TRACE_REG_PERF_CLOSE:
> + case TRACE_REG_PERF_ADD:
> + case TRACE_REG_PERF_DEL:
> +#ifdef CONFIG_PERF_EVENTS
> + return perf_ftrace_event_register(call, type, data);
> +#endif
> + case TRACE_REG_REGISTER:
> + case TRACE_REG_UNREGISTER:
> + break;
> + }
> +
> + return -EINVAL;
> +}
> +
> #undef __entry
> #define __entry REC
>
> @@ -159,6 +181,7 @@ struct ftrace_event_class event_class_ftrace_##call = { \
> .system = __stringify(TRACE_SYSTEM), \
> .define_fields = ftrace_define_fields_##call, \
> .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
> + .reg = ftrace_event_class_register, \
> }; \
> \
> struct ftrace_event_call __used event_##call = { \


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/