Re: [PATCH 10/13] tracing/uprobes: Fetch args before reserving aring buffer

From: Masami Hiramatsu
Date: Fri Aug 09 2013 - 05:57:10 EST


(2013/08/09 17:45), Namhyung Kim wrote:
> From: Namhyung Kim <namhyung.kim@xxxxxxx>
>
> Fetching from user space should be done in a non-atomic context. So
> use a temporary buffer and copy its content to the ring buffer
> atomically.
>
> While at it, use __get_data_size() and store_trace_args() to reduce
> code duplication.

I just concern using kmalloc() in the event handler. For fetching user
memory which can be swapped out, that is true. But most of the cases,
we can presume that it exists on the physical memory.

I'd like to ask the opinions of Srikar and Oleg.

BTW, you'd better add to patch description why previously this is
not needed, and your series needs it too. :)

Thank you,

> Cc: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
> Cc: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
> Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
> Cc: zhangwei(Jovi) <jovi.zhangwei@xxxxxxxxxx>
> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
> ---
> kernel/trace/trace_uprobe.c | 69 ++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 53 insertions(+), 16 deletions(-)
>
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index f991cac2b9ba..2888b95b063f 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -516,15 +516,31 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> struct uprobe_trace_entry_head *entry;
> struct ring_buffer_event *event;
> struct ring_buffer *buffer;
> - void *data;
> - int size, i;
> + void *data, *tmp;
> + int size, dsize, esize;
> struct ftrace_event_call *call = &tu->p.call;
>
> - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> + dsize = __get_data_size(&tu->p, regs);
> + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> +
> + /*
> + * A temporary buffer is used for storing fetched data before reserving
> + * the ring buffer because fetching from user space should be done in a
> + * non-atomic context.
> + */
> + tmp = kmalloc(tu->p.size + dsize, GFP_KERNEL);
> + if (tmp == NULL)
> + return;
> +
> + store_trace_args(esize, &tu->p, regs, tmp, dsize);
> +
> + size = esize + tu->p.size + dsize;
> event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
> - size + tu->p.size, 0, 0);
> - if (!event)
> + size, 0, 0);
> + if (!event) {
> + kfree(tmp);
> return;
> + }
>
> entry = ring_buffer_event_data(event);
> if (is_ret_probe(tu)) {
> @@ -536,13 +552,12 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
> data = DATAOF_TRACE_ENTRY(entry, false);
> }
>
> - for (i = 0; i < tu->p.nr_args; i++) {
> - call_fetch(&tu->p.args[i].fetch, regs,
> - data + tu->p.args[i].offset);
> - }
> + memcpy(data, tmp, tu->p.size + dsize);
>
> if (!filter_current_check_discard(buffer, call, entry, event))
> trace_buffer_unlock_commit(buffer, event, 0, 0);
> +
> + kfree(tmp);
> }
>
> /* uprobe handler */
> @@ -756,11 +771,30 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
> struct ftrace_event_call *call = &tu->p.call;
> struct uprobe_trace_entry_head *entry;
> struct hlist_head *head;
> - void *data;
> - int size, rctx, i;
> + void *data, *tmp;
> + int size, dsize, esize;
> + int rctx;
> +
> + dsize = __get_data_size(&tu->p, regs);
> + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> +
> + /*
> + * A temporary buffer is used for storing fetched data before reserving
> + * the ring buffer because fetching from user space should be done in a
> + * non-atomic context.
> + */
> + tmp = kmalloc(tu->p.size + dsize, GFP_KERNEL);
> + if (tmp == NULL)
> + return;
>
> - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
> - size = ALIGN(size + tu->p.size + sizeof(u32), sizeof(u64)) - sizeof(u32);
> + store_trace_args(esize, &tu->p, regs, tmp, dsize);
> +
> + size = esize + tu->p.size + dsize;
> + size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
> + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) {
> + kfree(tmp);
> + return;
> + }
>
> preempt_disable();
> head = this_cpu_ptr(call->perf_events);
> @@ -780,15 +814,18 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
> data = DATAOF_TRACE_ENTRY(entry, false);
> }
>
> - for (i = 0; i < tu->p.nr_args; i++) {
> - struct probe_arg *parg = &tu->p.args[i];
> + memcpy(data, tmp, tu->p.size + dsize);
> +
> + if (size - esize > tu->p.size + dsize) {
> + int len = tu->p.size + dsize;
>
> - call_fetch(&parg->fetch, regs, data + parg->offset);
> + memset(data + len, 0, size - esize - len);
> }
>
> perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
> out:
> preempt_enable();
> + kfree(tmp);
> }
>
> /* uprobe profile handler */
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: masami.hiramatsu.pt@xxxxxxxxxxx


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/