[PATCH RFC 10/10] perf,tools: Show STAT_READ in perf report

From: kan . liang
Date: Tue Aug 18 2015 - 12:42:31 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

This patch parse the PERF_RECORD_STAT_READ in perf report.
With -D option, event name, CPU id and value will be dumped.
With --stdio option, the value will be shown in a new section
(Performance counter stats) between header and sample result.

Example 1:

# perf record -e 'cycles,uncore_imc_0/cas_count_read/N'
--stat-read-interval 10 -a sleep 5
[ perf record: Woken up 501 times to write data ]
[ perf record: Captured and wrote 0.376 MB perf.data (816 samples) ]
# perf report -D | tail
SAMPLE events: 816
MMAP2 events: 1787
FINISHED_ROUND events: 330
STAT_READ events: 1002
cycles stats:
TOTAL events: 816
SAMPLE events: 816
uncore_imc_0/cas_count_read/N stats:
TOTAL events: 1002
STAT_READ events: 1002

Example 2:
$ perf record -e '{cycles,instructions}:N' -a sleep 2
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.247 MB perf.data (209 samples) ]

$ perf report --stdio --group --socket
# To display the perf.data header info, please use
--header/--header-only options.
#
# Samples: 209 of event 'anon group { cycles, instructions }'
# Event count (approx.): 41787924
#
# Socket: 0
#
# Performance counter stats:
# instructions 2745005
#
# Overhead Command Shared Object Symbol
# ................ ............ .................
.................................
#
8.06% 0.00% swapper [kernel.vmlinux] [k] ixgbe_read_reg
2.03% 0.00% sleep [kernel.vmlinux] [k]
__rb_insert_augmented
1.98% 0.00% swapper [kernel.vmlinux] [k]
run_timer_softirq

# Socket: 1
#
# Performance counter stats:
# instructions 6386942
#
# Overhead Command Shared Object Symbol
# ................ ............ .................
.................................
#
27.09% 0.00% kworker/23:2 [kernel.vmlinux] [k] delay_tsc
17.53% 0.00% perf [kernel.vmlinux] [k]
smp_call_function_single
13.62% 0.00% kworker/23:2 [kernel.vmlinux] [k] ixgbe_read_reg

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
tools/perf/builtin-report.c | 101 +++++++++++++++++++++++++++++++++++++++++---
tools/perf/util/cpumap.c | 4 +-
tools/perf/util/cpumap.h | 1 +
3 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 6fdf9f4..4794a83 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -182,6 +182,34 @@ out_put:
return ret;
}

+static int process_stat_read_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+ int id = event->stat_read.pos_id;
+
+ while (id--) {
+ BUG_ON(evsel == NULL);
+ evsel = list_next_entry(evsel, node);
+ }
+
+ if ((evsel->counts == NULL) &&
+ (perf_evsel__alloc_counts(evsel, max_cpu_num, 1) < 0))
+ return -ENOMEM;
+
+ perf_counts(evsel->counts, event->stat_read.cpu, 0)->val += event->stat_read.value;
+ hists__inc_nr_events(evsel__hists(evsel), event->header.type);
+
+ dump_printf(": %s CPU %d: value %" PRIu64 " time: %" PRIu64 "\n",
+ evsel ? perf_evsel__name(evsel) : "FAIL",
+ event->stat_read.cpu,
+ event->stat_read.value,
+ event->stat_read.time);
+ return 0;
+}
+
static int process_read_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -329,6 +357,47 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
return ret + fprintf(fp, "\n#\n");
}

+static inline void _stat_fprintf(FILE *fp, struct perf_evsel *evsel,
+ int socket, bool print_head)
+{
+ int i;
+ u64 stat_count = 0;
+
+ if (print_head)
+ fprintf(fp, "# Performance counter stats:\n");
+
+ for (i = 0; i < max_cpu_num; i++) {
+ if ((socket >= 0) && (cpu__get_socket(i) != socket))
+ continue;
+ stat_count += perf_counts(evsel->counts, i, 0)->val;
+ }
+
+ fprintf(fp, "# %s\t %" PRIu64, evsel->name, stat_count);
+}
+
+static void stat_fprintf(FILE *fp, struct perf_evsel *evsel, int socket)
+{
+ struct perf_evsel *leader = evsel->leader;
+ struct perf_evsel *pos;
+ bool print_head = true;
+
+ if (!symbol_conf.event_group &&
+ (evsel->counts == NULL))
+ return;
+
+ if (symbol_conf.event_group) {
+ for_each_group_member(pos, leader) {
+ if (pos->counts == NULL)
+ continue;
+ _stat_fprintf(fp, pos, socket, print_head);
+ print_head = false;
+ }
+ } else
+ _stat_fprintf(fp, evsel, socket, print_head);
+
+ fprintf(fp, "\n#\n");
+}
+
static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
struct report *rep,
const char *help)
@@ -344,17 +413,23 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
!perf_evsel__is_group_leader(pos))
continue;

- hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+ if (pos->counts == NULL)
+ hists__fprintf_nr_sample_events(hists, rep, evname, stdout);

if (symbol_conf.socket) {
int i;

for (i = 0; i < max_socket_num; i++) {
- fprintf(stdout, "#\n# Socket: %d\n#\n", i);
- hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout, i);
+ fprintf(stdout, "\n# Socket: %d\n#\n", i);
+ stat_fprintf(stdout, pos, i);
+ if (pos->counts == NULL)
+ hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout, i);
}
- } else
- hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout, -1);
+ } else {
+ stat_fprintf(stdout, pos, -1);
+ if (pos->counts == NULL)
+ hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout, -1);
+ }

fprintf(stdout, "\n\n");
}
@@ -611,6 +686,17 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}

+static void
+free_stat_counts(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->counts != NULL)
+ perf_evsel__free_counts(evsel);
+ }
+}
+
int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
{
struct perf_session *session;
@@ -634,6 +720,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.read = process_read_event,
+ .stat_read = process_stat_read_event,
.attr = perf_event__process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
@@ -920,18 +1007,20 @@ repeat:
}

sort__setup_elide(stdout);
-
+ set_max_cpu_num();
if (symbol_conf.socket)
set_max_socket_num();

ret = __cmd_report(&report);
if (ret == K_SWITCH_INPUT_DATA) {
+ free_stat_counts(session->evlist);
perf_session__delete(session);
goto repeat;
} else
ret = 0;

error:
+ free_stat_counts(session->evlist);
perf_session__delete(session);
return ret;
}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index ae03426..9ce0f2a 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -380,7 +380,7 @@ out:
}

/* Determine highest possible cpu in the system for sparse allocation */
-static void set_max_cpu_num(void)
+void set_max_cpu_num(void)
{
const char *mnt;
char path[PATH_MAX];
@@ -411,8 +411,6 @@ void set_max_socket_num(void)
{
int cpu, socket;

- set_max_cpu_num();
-
max_socket_num = 1;
for (cpu = 0; cpu < max_cpu_num; cpu++) {
socket = cpu__get_socket(cpu);
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 094edd9..ef2ca6e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -61,6 +61,7 @@ int *cpunode_map;

int cpu__setup_cpunode_map(void);
void set_max_socket_num(void);
+void set_max_cpu_num(void);

static inline int cpu__max_node(void)
{
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/