Re: [PATCH] perf tools: Fix tracing info recording

From: Frederic Weisbecker
Date: Wed Sep 21 2011 - 11:30:34 EST


On Wed, Sep 14, 2011 at 03:58:40PM +0200, Jiri Olsa wrote:
> The tracing information is part of the perf data file. It contains
> several files from within the tracing debugfs and procs directories.
>
> Beside some static header files, for each tracing event the format
> file is added. The /proc/kallsyms file is also added.
>
> The tracing data are stored with preceeding size. This is causing some
> dificulties for pipe output, since there's no way to tell debugfs/proc
> file size before reading it. So, for pipe output, all the debugfs files
> were read twice. Once to get the overall size and once to store the
> content itself. This can cause problem in case any of these file
> changed, within the storage time.
>
> Fixing this behaviour by using temp file in case of pipe output. The
> debugfs/proc files are being read only once, ensuring the integrity of
> the tracing data.
>
> Also changing the way the event files are searched by quering specified
> event files directly, instead of walking the events directory.
>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>

Looks good overall, but I have some comments about details:

> ---
> tools/perf/util/header.c | 39 ++++-
> tools/perf/util/parse-events.h | 1 +
> tools/perf/util/trace-event-info.c | 333 +++++++++++++++++++-----------------
> tools/perf/util/trace-event.h | 7 +
> 4 files changed, 215 insertions(+), 165 deletions(-)
>
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index b6c1ad1..6a9fd5b 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -387,13 +387,21 @@ static int perf_header__adds_write(struct perf_header *header,
>
> if (perf_header__has_feat(header, HEADER_TRACE_INFO)) {
> struct perf_file_section *trace_sec;
> + struct tracing_data *tdata;
>
> trace_sec = &feat_sec[idx++];
> -
> - /* Write trace info */
> trace_sec->offset = lseek(fd, 0, SEEK_CUR);
> - read_tracing_data(fd, &evlist->entries);
> - trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
> +
> + /*
> + * We work over the real file, we can write data
> + * directly, not temp file is needed.

s/not/no

May be also briefly explain why we can write directly here. The fact
we have reserved space to write the size in the headers already.

> + */
> + tdata = tracing_data_get(&evlist->entries, fd, false);
> + if (!tdata)
> + goto out_free;
> +
> + trace_sec->size = tracing_data_size(tdata);
> + tracing_data_put(tdata);
> }
>
> if (perf_header__has_feat(header, HEADER_BUILD_ID)) {
> @@ -1100,15 +1108,25 @@ int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
> struct perf_session *session __unused)
> {
> union perf_event ev;
> + struct tracing_data *tdata;
> ssize_t size = 0, aligned_size = 0, padding;
> int err __used = 0;
>
> + /*
> + * The fd descriptor is pipe, se we need to:
> + * - write the tracing data to the temp file
> + * - get/write the data size to pipe
> + * - write the tracing data from the temp file
> + * to the pipe
> + */

That also needs a brief explanation on why we need to do that.


> + tdata = tracing_data_get(&evlist->entries, fd, true);
> + if (!tdata)
> + return -1;
> +
> memset(&ev, 0, sizeof(ev));
>
> ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
> - size = read_tracing_data_size(fd, &evlist->entries);
> - if (size <= 0)
> - return size;
> + size = tracing_data_size(tdata);
> aligned_size = ALIGN(size, sizeof(u64));
> padding = aligned_size - size;
> ev.tracing_data.header.size = sizeof(ev.tracing_data);
<snip>
> -int read_tracing_data(int fd, struct list_head *pattrs)
> +#define FILENAME_SIZE 50
> +
> +struct tracing_data {
> + ssize_t size;
> + bool temp;
> + char temp_file[FILENAME_SIZE];
> +};
> +
> +ssize_t tracing_data_size(struct tracing_data *td)
> {
> - char buf[BUFSIZ];
> - struct tracepoint_path *tps = get_tracepoints_path(pattrs);
> + return td->size;
> +}

May be inline it. Or rather directly access td->size from calling code.

>
> - /*
> - * What? No tracepoints? No sense writing anything here, bail out.
> - */
> - if (tps == NULL)
> - return -1;
> +static char *get_format_file(char *sys, char *name)
> +{
> + char *file, *path;
>
> - output_fd = fd;
> + path = get_tracing_file("events");
> + if (!path)
> + return NULL;
> +
> + file = malloc_or_die(strlen(path) + strlen(sys) +
> + strlen(name) + strlen("format") + 10);

Why "10" ?

> +
> + sprintf(file, "%s/%s/%s/format", path, sys, name);
> + return file;
> +}
> +
> +static void put_format_file(char *file)
> +{
> + free(file);
> +}
> +
> +/*
> + * Walk tracepoint event objects and store them.
> + * Only those matching the sys parameter are stored
> + * and marked as done.
> + */
> +static void read_event_files_system(struct tracepoint_path *tps,
> + const char *sys)
> +{
> + off_t count_pos;
> + u32 count = 0;
> +
> + /* specified events count under single system */
> + count_pos = lseek(output_fd, 0, SEEK_CUR);
> + write_or_die(&count, 4);
> +
> + while (tps) {
> + if ((!tps->done) &&
> + (!strcmp(sys, tps->system))) {
> + char *file;
> +
> + file = get_format_file(tps->system, tps->name);
> + if (file) {
> + record_file(file, 8);
> + count++;
> + }
> +
> + put_format_file(file);
> + tps->done = 1;
> + }
> +
> + tps = tps->next;
> + }
> +
> + if (pwrite(output_fd, &count, 4, count_pos) < 0)
> + die("pwrite");
> +}
> +
> +/*
> + * We have all needed tracepoint event files stored in
> + * the tracepoint_path objects.
> + *
> + * - first we store ftrace system events
> + * - then we walk all '!done' event objects
> + * and process them
> + */
> +static void read_event_files(struct tracepoint_path *tps)
> +{
> + off_t count_pos;
> + u32 count = 0;
> +
> + read_event_files_system(tps, "ftrace");
> +
> + /* systems count */
> + count_pos = lseek(output_fd, 0, SEEK_CUR);
> + write_or_die(&count, 4);
> +
> + while (tps) {
> + if (!tps->done) {
> + write_or_die(tps->system, strlen(tps->system) + 1);
> + read_event_files_system(tps, tps->system);
> + count++;
> + }
> +
> + tps = tps->next;
> + }
>
> + if (pwrite(output_fd, &count, 4, count_pos) < 0)
> + die("write");
> +}

There seem to be a significant reorganization of the code in that file
that seem to overlap the initial scope of this patch. Should
it go to a seperate single purpose patch?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/