Re: [PATCH] perf: teach perf inject to merge sched_stat_* andsched_switch events (v3)

From: Andrey Wagin
Date: Fri Oct 12 2012 - 17:08:02 EST


Hello Frederic,

Could you write your opinion about this patch? Here are fixed all
bugs, which you mentioned for the previous version.

Thanks.

2012/9/18 Andrew Vagin <avagin@xxxxxxxxxx>:
> You may want to know where and how long a task is sleeping. A callchain
> may be found in sched_switch and a time slice in stat_iowait, so I add
> handler in perf inject for merging this events.
>
> My code saves sched_switch event for each process and when it meets
> stat_iowait, it reports the sched_switch event, because this event
> contains a correct callchain. By another words it replaces all
> stat_iowait events on proper sched_switch events.
>
> v2: - remove the global variable "session"
> - hadle errors from malloc()
>
> v3: - use sample->tid instead of sample->pid for merging events.
>
> Frederic Weisbecker noticed that this code works only in a root pidns.
> It's true, because a pid from trace content is not pid-namespace safe
> and currently no way to get this pid in a current pidns. This problem
> is more general, so I don't think that it should be solved in this series.
>
> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxxxxxxxxxx>
> Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Cc: Paul Mackerras <paulus@xxxxxxxxx>,
> Cc: Ingo Molnar <mingo@xxxxxxxxxx
> Cc: Andi Kleen <andi@xxxxxxxxxxxxxx>
> Cc: David Ahern <dsahern@xxxxxxxxx>
> Signed-off-by: Andrew Vagin <avagin@xxxxxxxxxx>
> ---
> tools/perf/Documentation/perf-inject.txt | 4 ++
> tools/perf/builtin-inject.c | 86 ++++++++++++++++++++++++++++++
> 2 files changed, 90 insertions(+), 0 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
> index 6be2101..c04e0c6 100644
> --- a/tools/perf/Documentation/perf-inject.txt
> +++ b/tools/perf/Documentation/perf-inject.txt
> @@ -35,6 +35,10 @@ OPTIONS
> -o::
> --output=::
> Output file name. (default: stdout)
> +-s::
> +--sched-stat::
> + Merge sched_stat and sched_switch for getting events where and how long
> + tasks slept.
>
> SEE ALSO
> --------
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index ed12b19..1783f0a 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -8,11 +8,13 @@
> #include "builtin.h"
>
> #include "perf.h"
> +#include "util/evsel.h"
> #include "util/session.h"
> #include "util/tool.h"
> #include "util/debug.h"
>
> #include "util/parse-options.h"
> +#include "util/trace-event.h"
>
> static const char *input_name = "-";
> static const char *output_name = "-";
> @@ -21,6 +23,7 @@ static int output;
> static u64 bytes_written;
>
> static bool inject_build_ids;
> +static bool inject_sched_stat;
>
> static int perf_event__repipe_synth(struct perf_tool *tool __used,
> union perf_event *event,
> @@ -213,6 +216,83 @@ repipe:
> return 0;
> }
>
> +struct event_entry {
> + struct list_head node;
> + u32 tid;
> + union perf_event event[0];
> +};
> +
> +static LIST_HEAD(samples);
> +
> +static int perf_event__sched_stat(struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct perf_evsel *evsel,
> + struct machine *machine)
> +{
> + const char *evname = NULL;
> + uint32_t size;
> + struct event_entry *ent;
> + union perf_event *event_sw = NULL;
> + struct perf_sample sample_sw;
> + int sched_process_exit;
> +
> + size = event->header.size;
> +
> + evname = evsel->tp_format->name;
> +
> + sched_process_exit = !strcmp(evname, "sched_process_exit");
> +
> + if (!strcmp(evname, "sched_switch") || sched_process_exit) {
> + list_for_each_entry(ent, &samples, node)
> + if (sample->tid == ent->tid)
> + break;
> +
> + if (&ent->node != &samples) {
> + list_del(&ent->node);
> + free(ent);
> + }
> +
> + if (sched_process_exit)
> + return 0;
> +
> + ent = malloc(size + sizeof(struct event_entry));
> + if (ent == NULL)
> + die("malloc");
> + ent->tid = sample->tid;
> + memcpy(&ent->event, event, size);
> + list_add(&ent->node, &samples);
> + return 0;
> +
> + } else if (!strncmp(evname, "sched_stat_", 11)) {
> + u32 pid;
> +
> + pid = raw_field_value(evsel->tp_format,
> + "pid", sample->raw_data);
> +
> + list_for_each_entry(ent, &samples, node) {
> + if (pid == ent->tid)
> + break;
> + }
> +
> + if (&ent->node == &samples)
> + return 0;
> +
> + event_sw = &ent->event[0];
> + perf_evsel__parse_sample(evsel, event_sw, &sample_sw, false);
> +
> + sample_sw.period = sample->period;
> + sample_sw.time = sample->time;
> + perf_evsel__synthesize_sample(evsel, event_sw, &sample_sw, false);
> +
> + perf_event__repipe(tool, event_sw, &sample_sw, machine);
> + return 0;
> + }
> +
> + perf_event__repipe(tool, event, sample, machine);
> +
> + return 0;
> +}
> struct perf_tool perf_inject = {
> .sample = perf_event__repipe_sample,
> .mmap = perf_event__repipe,
> @@ -248,6 +328,9 @@ static int __cmd_inject(void)
> perf_inject.mmap = perf_event__repipe_mmap;
> perf_inject.fork = perf_event__repipe_task;
> perf_inject.tracing_data = perf_event__repipe_tracing_data;
> + } else if (inject_sched_stat) {
> + perf_inject.sample = perf_event__sched_stat;
> + perf_inject.ordered_samples = true;
> }
>
> session = perf_session__new(input_name, O_RDONLY, false, true, &perf_inject);
> @@ -275,6 +358,9 @@ static const char * const report_usage[] = {
> static const struct option options[] = {
> OPT_BOOLEAN('b', "build-ids", &inject_build_ids,
> "Inject build-ids into the output stream"),
> + OPT_BOOLEAN('s', "sched-stat", &inject_sched_stat,
> + "Merge sched-stat and sched-switch for getting events "
> + "where and how long tasks slept"),
> OPT_STRING('i', "input", &input_name, "file",
> "input file name"),
> OPT_STRING('o', "output", &output_name, "file",
> --
> 1.7.1
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/