Re: [PATCH] perf report: auto-detect branch stack sampling mode

From: Stephane Eranian
Date: Fri Feb 24 2012 - 10:28:33 EST


On Fri, Feb 24, 2012 at 4:24 PM, David Ahern <dsahern@xxxxxxxxx> wrote:
> On 2/24/12 2:40 AM, Stephane Eranian wrote:
>>
>>
>> This patch adds auto-detection of samples with taken branch stacks.
>> The auto-detection avoids having to specify the -b or --branch-stack
>> option on the cmdline.
>>
>> The patch adds a new feature bit HEADER_BRANCH_STACK to mark the
>> presence of branch stacks in samples.
>>
>> You can now do:
>> $ perf record -b any noploop 2
>> $ perf report
>> # Events: 8K cycles
>> #
>> # Overhead ÂCommand ÂSource Shared Object    ÂSource Symbol ÂTarget
>> Shared Object    Target Symbol
>> # ........ Â....... Â.................... Â...................
>> Â.................... Â..................
>> #
>>   91.56% Ânoploop Ânoploop        [.] noploop
>> Â noploop Â[.] noploop
>> Â Â Â0.42% Ânoploop Â[kernel.kallsyms] Â Â [k] __lock_acquire
>> Â[kernel.kallsyms] Â[k] __lock_acquire
>>
>>
>> To force regular reporting based on the instruction address:
>> $ perf report --no-branch-stack
>> #
>> # Events: 2K cycles
>> #
>> # Overhead ÂCommand   ÂShared Object              Symbol
>> # ........ Â....... Â................. Â...............................
>> #
>>   92.03% Ânoploop Ânoploop      Â[.] noploop
>> Â Â Â1.00% Ânoploop Â[kernel.kallsyms] Â[k] lock_acquire
>>
>>
>> Signed-off-by: Stephane Eranian<eranian@xxxxxxxxxx>
>> ---
>>
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index 1c49d4e..5e833a2 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int
>> argc, const char **argv)
>> Â Â Â Âif (!have_tracepoints(&evsel_list->entries))
>> Â Â Â Â Â Â Â Âperf_header__clear_feat(&session->header,
>> HEADER_TRACE_INFO);
>>
>> + Â Â Â if (!rec->opts.branch_stack)
>> + Â Â Â Â Â Â Â perf_header__clear_feat(&session->header,
>> HEADER_BRANCH_STACK);
>
>
> branch tracing is user requested on, so shouldn't feature default off and
> only be enabled when requested?
>
Well, what Ingo was suggesting is that perf report auto-detects whether or
not branch mode is necessary by looking at the perf.data file. Most likely
if you've recorded with -b, you are interested in a branch mode view rather
that the instruction view (default). So all this does is elimintate the need
to pass -b to perf report to enable branch mode.

> David
>
>
>> +
>> Â Â Â Âif (!rec->file_new) {
>> Â Â Â Â Â Â Â Âerr = perf_session__read_header(session, output);
>> Â Â Â Â Â Â Â Âif (err< Â0)
>> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
>> index 528789f..edd4289 100644
>> --- a/tools/perf/builtin-report.c
>> +++ b/tools/perf/builtin-report.c
>> @@ -306,21 +306,14 @@ static int __cmd_report(struct perf_report *rep)
>> Â{
>> Â Â Â Âint ret = -EINVAL;
>> Â Â Â Âu64 nr_samples;
>> - Â Â Â struct perf_session *session;
>> Â Â Â Âstruct perf_evsel *pos;
>> + Â Â Â struct perf_session *session = rep->session;
>> Â Â Â Âstruct map *kernel_map;
>> Â Â Â Âstruct kmap *kernel_kmap;
>> Â Â Â Âconst char *help = "For a higher level overview, try: perf report
>> --sort comm,dso";
>>
>> Â Â Â Âsignal(SIGINT, sig_handler);
>>
>> - Â Â Â session = perf_session__new(rep->input_name, O_RDONLY,
>> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â rep->force, false,&rep->tool);
>>
>> - Â Â Â if (session == NULL)
>> - Â Â Â Â Â Â Â return -ENOMEM;
>> -
>> - Â Â Â rep->session = session;
>> -
>> Â Â Â Âif (rep->cpu_list) {
>> Â Â Â Â Â Â Â Âret = perf_session__cpu_bitmap(session, rep->cpu_list,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â rep->cpu_bitmap);
>> @@ -489,7 +482,10 @@ parse_callchain_opt(const struct option *opt, const
>> char *arg, int unset)
>>
>> Âint cmd_report(int argc, const char **argv, const char *prefix __used)
>> Â{
>> + Â Â Â struct perf_session *session;
>> Â Â Â Âstruct stat st;
>> + Â Â Â bool has_br_stack;
>> + Â Â Â int ret = -1;
>> Â Â Â Âchar callchain_default_opt[] = "fractal,0.5,callee";
>> Â Â Â Âconst char * const report_usage[] = {
>> Â Â Â Â Â Â Â Â"perf report [<options>]",
>> @@ -600,7 +596,23 @@ int cmd_report(int argc, const char **argv, const
>> char *prefix __used)
>> Â Â Â Â Â Â Â Â Â Â Â Âreport.input_name = "perf.data";
>> Â Â Â Â}
>>
>> - Â Â Â if (sort__branch_mode) {
>> + Â Â Â session = perf_session__new(report.input_name, O_RDONLY,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â report.force, false,&report.tool);
>> + Â Â Â if (session == NULL)
>> + Â Â Â Â Â Â Â return -ENOMEM;
>> +
>> + Â Â Â report.session = session;
>> +
>> + Â Â Â has_br_stack = perf_header__has_feat(&session->header,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ÂHEADER_BRANCH_STACK);
>> +
>> + Â Â Â /*
>> + Â Â Â Â* if branch mode set by user via -b or --branch-stack
>> + Â Â Â Â* or not forced off by user (-no-branch-stack) user and present
>> + Â Â Â Â* in the file then we set branch mode
>> + Â Â Â Â*/
>> + Â Â Â if (sort__branch_mode || (sort__branch_mode == -1&&
>> Âhas_br_stack)) {
>>
>> + Â Â Â Â Â Â Â sort__branch_mode = true;
>> Â Â Â Â Â Â Â Âif (use_browser)
>> Â Â Â Â Â Â Â Â Â Â Â Âfprintf(stderr, "Warning: TUI interface not
>> supported"
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â" in branch mode\n");
>> @@ -657,13 +669,13 @@ int cmd_report(int argc, const char **argv, const
>> char *prefix __used)
>> Â Â Â Â}
>>
>> Â Â Â Âif (symbol__init()< Â0)
>> - Â Â Â Â Â Â Â return -1;
>> + Â Â Â Â Â Â Â goto error;
>>
>> Â Â Â Âsetup_sorting(report_usage, options);
>>
>> Â Â Â Âif (parent_pattern != default_parent_pattern) {
>> Â Â Â Â Â Â Â Âif (sort_dimension__add("parent")< Â0)
>> - Â Â Â Â Â Â Â Â Â Â Â return -1;
>> + Â Â Â Â Â Â Â Â Â Â Â goto error;
>>
>> Â Â Â Â Â Â Â Â/*
>> Â Â Â Â Â Â Â Â * Only show the parent fields if we explicitly
>> @@ -685,5 +697,8 @@ int cmd_report(int argc, const char **argv, const char
>> *prefix __used)
>> Â Â Â Âsort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm",
>> stdout);
>> Â Â Â Âsort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol",
>> stdout);
>>
>> - Â Â Â return __cmd_report(&report);
>> + Â Â Â ret = __cmd_report(&report);
>> +error:
>> + Â Â Â perf_session__delete(session);
>> + Â Â Â return ret;
>> Â}
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index c851495..c22491e 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -1023,6 +1023,12 @@ static int write_cpuid(int fd, struct perf_header
>> *h __used,
>> Â Â Â Âreturn do_write_string(fd, buffer);
>> Â}
>>
>> +static int write_branch_stack(int fd __used, struct perf_header *h
>> __used,
>> + Â Â Â Â Â Â Â Â Â Â Âstruct perf_evlist *evlist __used)
>> +{
>> + Â Â Â return 0;
>> +}
>> +
>> Âstatic void print_hostname(struct perf_header *ph, int fd, FILE *fp)
>> Â{
>> Â Â Â Âchar *str = do_read_string(fd, ph);
>> @@ -1315,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int
>> fd, FILE *fp)
>> Â Â Â Âfree(str);
>> Â}
>>
>> +static void print_branch_stack(struct perf_header *ph __used, int fd
>> __used,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â ÂFILE *fp)
>> +{
>> + Â Â Â fprintf(fp, "# contains samples with branch stacks\n");
>> +}
>> +
>> Âstatic int __event_process_build_id(struct build_id_event *bev,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âchar *filename,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct perf_session *session)
>> @@ -1519,6 +1531,7 @@ static const struct feature_ops
>> feat_ops[HEADER_LAST_FEATURE] = {
>> Â Â Â ÂFEAT_OPA(HEADER_CMDLINE, Â Â Â Âcmdline),
>> Â Â Â ÂFEAT_OPF(HEADER_CPU_TOPOLOGY, Â cpu_topology),
>> Â Â Â ÂFEAT_OPF(HEADER_NUMA_TOPOLOGY, Ânuma_topology),
>> + Â Â Â FEAT_OPA(HEADER_BRANCH_STACK, Â branch_stack),
>> Â};
>>
>> Âstruct header_print_data {
>> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
>> index e68f617..21a6be0 100644
>> --- a/tools/perf/util/header.h
>> +++ b/tools/perf/util/header.h
>> @@ -27,7 +27,7 @@ enum {
>> Â Â Â ÂHEADER_EVENT_DESC,
>> Â Â Â ÂHEADER_CPU_TOPOLOGY,
>> Â Â Â ÂHEADER_NUMA_TOPOLOGY,
>> -
>> + Â Â Â HEADER_BRANCH_STACK,
>> Â Â Â ÂHEADER_LAST_FEATURE,
>> Â Â Â ÂHEADER_FEAT_BITS Â Â Â Â= 256,
>> Â};
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index 2739ed1..69d50c0 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -8,7 +8,7 @@ const char   Âdefault_sort_order[] = "comm,dso,symbol";
>> Âconst char  Â*sort_order = default_sort_order;
>> Âint      sort__need_collapse = 0;
>> Âint      sort__has_parent = 0;
>> -bool      sort__branch_mode;
>> +bool      sort__branch_mode = -1; /* -1 = means not set */
>>
>> Âenum sort_type    Âsort__first_dimension;
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/