[PATCH 11/14] perf tools: record write data overhead

From: kan . liang
Date: Wed Nov 23 2016 - 12:45:59 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

perf record writes all kinds of events to file, which brings overhead as
well. Introducing PERF_USER_WRITE_OVERHEAD to save the overhead
information.
The overhead is measured by the time taken by the write itself, not the
elapsed system time.
Caculate the total user write overhead on each CPU, and display them in
perf report

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
tools/perf/builtin-record.c | 61 +++++++++++++++++++++++++++++++++++++++++++--
tools/perf/util/event.h | 9 +++++++
tools/perf/util/machine.c | 5 ++++
tools/perf/util/session.c | 4 +++
4 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 67d2a90..492058e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -45,6 +45,11 @@
#include <asm/bug.h>
#include <linux/time64.h>

+struct write_overhead{
+ u64 nr;
+ u64 time;
+};
+
struct record {
struct perf_tool tool;
struct record_opts opts;
@@ -63,17 +68,40 @@ struct record {
bool timestamp_filename;
bool switch_output;
unsigned long long samples;
+ struct write_overhead overhead[MAX_NR_CPUS];
};

+static u64 get_vnsecs(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
static int record__write(struct record *rec, void *bf, size_t size)
{
+ int cpu = sched_getcpu();
+ u64 start, end;
+ int ret = 0;
+
+ start = get_vnsecs();
if (perf_data_file__write(rec->session->file, bf, size) < 0) {
pr_err("failed to write perf data, error: %m\n");
- return -1;
+ ret = -1;
+ goto done;
}

rec->bytes_written += size;
- return 0;
+done:
+ end = get_vnsecs();
+ if (cpu >= 0) {
+ rec->overhead[cpu].nr++;
+ rec->overhead[cpu].time += (end - start);
+ }
+
+ return ret;
}

static int process_synthesized_event(struct perf_tool *tool,
@@ -813,6 +841,33 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}

+static void perf_event__synth_overhead(struct record *rec, perf_event__handler_t process)
+{
+ int cpu;
+
+ union perf_event event = {
+ .overhead = {
+ .header = {
+ .type = PERF_RECORD_USER_OVERHEAD,
+ .size = sizeof(struct perf_overhead),
+ },
+ },
+ };
+
+ event.overhead.type = PERF_USER_WRITE_OVERHEAD;
+
+ for (cpu = 0; cpu < MAX_NR_CPUS; cpu++) {
+ if (!rec->overhead[cpu].nr)
+ continue;
+
+ event.overhead.entry.cpu = cpu;
+ event.overhead.entry.nr = rec->overhead[cpu].nr;
+ event.overhead.entry.time = rec->overhead[cpu].time;
+
+ (void)process(&rec->tool, &event, NULL, NULL);
+ }
+}
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -1074,6 +1129,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}

+ perf_event__synth_overhead(rec, process_synthesized_event);
+
if (!quiet)
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1ef1a9d..9927cf9 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -272,6 +272,9 @@ enum auxtrace_error_type {
* events on each CPU.
* The total number of accessing side-band events handler function is stored
* in [0], while the accumulated processing time is in [1].
+ * The total_user_write_overhead tells exactly the overhead to write data in
+ * perf record.
+ * The total write# is stored in [0], while the accumulated time is in [1].
*/
struct events_stats {
u64 total_period;
@@ -283,6 +286,7 @@ struct events_stats {
u64 total_nmi_overhead[MAX_NR_CPUS][2];
u64 total_mux_overhead[MAX_NR_CPUS][2];
u64 total_sb_overhead[MAX_NR_CPUS][2];
+ u64 total_user_write_overhead[MAX_NR_CPUS][2];
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_non_filtered_samples;
u32 nr_lost_warned;
@@ -493,6 +497,11 @@ struct time_conv_event {
u64 time_zero;
};

+enum perf_user_overhead_event_type { /* above any possible kernel type */
+ PERF_USER_OVERHEAD_TYPE_START = 100,
+ PERF_USER_WRITE_OVERHEAD = 100,
+};
+
struct perf_overhead {
struct perf_event_header header;
u32 type;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d8cde21..ce7a0ea 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -573,6 +573,11 @@ int machine__process_overhead_event(struct machine *machine __maybe_unused,
event->overhead.entry.nr,
event->overhead.entry.time,
event->overhead.entry.cpu);
+ } else if (event->overhead.type == PERF_USER_WRITE_OVERHEAD) {
+ dump_printf(" User write nr: %llu time: %llu cpu %u\n",
+ event->overhead.entry.nr,
+ event->overhead.entry.time,
+ event->overhead.entry.cpu);
} else {
dump_printf("\tUNSUPPORT OVERHEAD TYPE 0x%x!\n", event->overhead.type);
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 27a5c8a..a72992b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1227,6 +1227,10 @@ overhead_stats_update(struct perf_tool *tool,
evlist->stats.total_sb_overhead[event->overhead.entry.cpu][0] += event->overhead.entry.nr;
evlist->stats.total_sb_overhead[event->overhead.entry.cpu][1] += event->overhead.entry.time;
break;
+ case PERF_USER_WRITE_OVERHEAD:
+ evlist->stats.total_user_write_overhead[event->overhead.entry.cpu][0] += event->overhead.entry.nr;
+ evlist->stats.total_user_write_overhead[event->overhead.entry.cpu][1] += event->overhead.entry.time;
+ break;
default:
break;
}
--
2.5.5