[PATCH 2/5] perf script: extend db-export api to include callchains for samples

From: Chris Phlipot
Date: Tue Apr 19 2016 - 04:58:08 EST


The current implementation of the python database export API only
includes call path information when using some form of call/return
tracing, but is unable to do so when sampling.

The following API extensions allow exporting of data collected by
perf record when using --call-graph.

The additions to the python api include the following:
- add call_path_id to sample_table to allow association of samples
with call paths

- add dso_id to call_path_table to more closely align the data with that
of a callchain_node

db-export and trace-script-python were both extended to accomidate the API
changes listed above.

Thread-stack's functionality was expanded to allow storage and exporting
of callchains that result from individual samples.

- Introduced a new static function (thread_stack__process_callchain) to
resolve call paths using the existing callchain resolution provided by
thread__resolve_callchain

- The existing call_path tree in call_return_processor is used for storing
the data from the resolved callchain.

- Call_return_processor was also extended to allow the ability to export
full call paths in addtion to the existing individual call/return pairs,
since call/return pairs are not available when doing sampling.

The code was tested using call graphs from fp and dwarf.
export-to-postgresqlwas utilized with intel-pt data to verify that changes
did not negatively affect existing behavior of the db-export api.

Signed-off-by: Chris Phlipot <cphlipot0@xxxxxxxxx>
---
tools/perf/util/db-export.c | 21 ++-
tools/perf/util/db-export.h | 2 +
.../util/scripting-engines/trace-event-python.c | 20 ++-
tools/perf/util/thread-stack.c | 162 +++++++++++++++++----
tools/perf/util/thread-stack.h | 14 +-
5 files changed, 184 insertions(+), 35 deletions(-)

diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 049438d..69c9a9d 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -329,6 +329,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
if (err)
goto out_put;

+ dbe->call_path_last_seen_db_id = 0;
+ if(dbe->crp) {
+ thread_stack__process_callchain(thread, comm, evsel,
+ al->machine, sample,
+ PERF_MAX_STACK_DEPTH, dbe->crp);
+ }
+
if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
sample_addr_correlates_sym(&evsel->attr)) {
struct addr_location addr_al;
@@ -346,6 +353,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
goto out_put;
}
}
+ es.call_path_db_id = dbe->call_path_last_seen_db_id;

if (dbe->export_sample)
err = dbe->export_sample(dbe, &es);
@@ -397,9 +405,10 @@ int db_export__branch_types(struct db_export *dbe)
int db_export__call_path(struct db_export *dbe, struct call_path *cp)
{
int err;
-
- if (cp->db_id)
+ if (cp->db_id) {
+ dbe->call_path_last_seen_db_id = cp->db_id;
return 0;
+ }

if (cp->parent) {
err = db_export__call_path(dbe, cp->parent);
@@ -409,8 +418,14 @@ int db_export__call_path(struct db_export *dbe, struct call_path *cp)

cp->db_id = ++dbe->call_path_last_db_id;

- if (dbe->export_call_path)
+ if (dbe->export_call_path) {
+ if (cp->dso)
+ db_export__dso(dbe, cp->dso, cp->machine);
+ if (cp->sym && cp->dso)
+ db_export__symbol(dbe, cp->sym, cp->dso);
+ dbe->call_path_last_seen_db_id = cp->db_id;
return dbe->export_call_path(dbe, cp);
+ }

return 0;
}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index 25e22fd..40e3b07 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -43,6 +43,7 @@ struct export_sample {
u64 addr_dso_db_id;
u64 addr_sym_db_id;
u64 addr_offset; /* addr offset from symbol start */
+ u64 call_path_db_id;
};

struct db_export {
@@ -73,6 +74,7 @@ struct db_export {
u64 symbol_last_db_id;
u64 sample_last_db_id;
u64 call_path_last_db_id;
+ u64 call_path_last_seen_db_id; /* last db_id seen(exported or not) */
u64 call_return_last_db_id;
struct list_head deferred;
};
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 525eb49..ca3f9c6 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -681,7 +681,7 @@ static int python_export_sample(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;

- t = tuple_new(21);
+ t = tuple_new(22);

tuple_set_u64(t, 0, es->db_id);
tuple_set_u64(t, 1, es->evsel->db_id);
@@ -704,6 +704,8 @@ static int python_export_sample(struct db_export *dbe,
tuple_set_u64(t, 18, es->sample->data_src);
tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+ tuple_set_s32(t, 21, es->call_path_db_id);
+

call_object(tables->sample_handler, t, "sample_table");

@@ -716,17 +718,19 @@ static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
{
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- u64 parent_db_id, sym_db_id;
+ u64 parent_db_id, sym_db_id, dso_db_id;

parent_db_id = cp->parent ? cp->parent->db_id : 0;
sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0;
+ dso_db_id = cp->dso ? cp->dso->db_id : 0;

- t = tuple_new(4);
+ t = tuple_new(5);

tuple_set_u64(t, 0, cp->db_id);
tuple_set_u64(t, 1, parent_db_id);
tuple_set_u64(t, 2, sym_db_id);
tuple_set_u64(t, 3, cp->ip);
+ tuple_set_u64(t, 4, dso_db_id);

call_object(tables->call_path_handler, t, "call_path_table");

@@ -763,6 +767,13 @@ static int python_export_call_return(struct db_export *dbe,
return 0;
}

+static int python_process_call_path(struct call_path *cp, void *data)
+{
+ struct db_export *dbe = data;
+
+ return db_export__call_path(dbe, cp);
+}
+
static int python_process_call_return(struct call_return *cr, void *data)
{
struct db_export *dbe = data;
@@ -1027,7 +1038,8 @@ static void set_table_handlers(struct tables *tables)

if (export_calls) {
tables->dbe.crp =
- call_return_processor__new(python_process_call_return,
+ call_return_processor__new(python_process_call_path,
+ python_process_call_return,
&tables->dbe);
if (!tables->dbe.crp)
Py_FatalError("failed to create calls processor");
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 679688e..38a749d 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -22,6 +22,7 @@
#include "debug.h"
#include "symbol.h"
#include "comm.h"
+#include "callchain.h"
#include "thread-stack.h"

#define CALL_PATH_BLOCK_SHIFT 8
@@ -56,7 +57,8 @@ struct call_path_root {
*/
struct call_return_processor {
struct call_path_root *cpr;
- int (*process)(struct call_return *cr, void *data);
+ int (*process_call_path)(struct call_path *cp, void *data);
+ int (*process_call_return)(struct call_return *cr, void *data);
void *data;
};

@@ -216,7 +218,7 @@ static int thread_stack__call_return(struct thread *thread,
if (no_return)
cr.flags |= CALL_RETURN_NO_RETURN;

- return crp->process(&cr, crp->data);
+ return crp->process_call_return(&cr, crp->data);
}

static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
@@ -336,9 +338,12 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
}

static void call_path__init(struct call_path *cp, struct call_path *parent,
+ struct machine *machine, struct dso *dso,
struct symbol *sym, u64 ip, bool in_kernel)
{
cp->parent = parent;
+ cp->machine = machine;
+ cp->dso = dso;
cp->sym = sym;
cp->ip = sym ? 0 : ip;
cp->db_id = 0;
@@ -354,7 +359,7 @@ static struct call_path_root *call_path_root__new(void)
cpr = zalloc(sizeof(struct call_path_root));
if (!cpr)
return NULL;
- call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+ call_path__init(&cpr->call_path, NULL, NULL, NULL, NULL, 0, false);
INIT_LIST_HEAD(&cpr->blocks);
return cpr;
}
@@ -372,8 +377,9 @@ static void call_path_root__free(struct call_path_root *cpr)

static struct call_path *call_path__new(struct call_path_root *cpr,
struct call_path *parent,
- struct symbol *sym, u64 ip,
- bool in_kernel)
+ struct machine *machine,
+ struct dso *dso, struct symbol *sym,
+ u64 ip, bool in_kernel)
{
struct call_path_block *cpb;
struct call_path *cp;
@@ -393,14 +399,16 @@ static struct call_path *call_path__new(struct call_path_root *cpr,
n = cpr->next++ & CALL_PATH_BLOCK_MASK;
cp = &cpb->cp[n];

- call_path__init(cp, parent, sym, ip, in_kernel);
+ call_path__init(cp, parent, machine, dso, sym, ip, in_kernel);

return cp;
}

static struct call_path *call_path__findnew(struct call_path_root *cpr,
struct call_path *parent,
- struct symbol *sym, u64 ip, u64 ks)
+ struct machine *machine,
+ struct dso *dso, struct symbol *sym,
+ u64 ip, u64 ks)
{
struct rb_node **p;
struct rb_node *node_parent = NULL;
@@ -411,23 +419,28 @@ static struct call_path *call_path__findnew(struct call_path_root *cpr,
ip = 0;

if (!parent)
- return call_path__new(cpr, parent, sym, ip, in_kernel);
+ return call_path__new(cpr, parent, machine, dso, sym, ip,
+ in_kernel);

p = &parent->children.rb_node;
while (*p != NULL) {
node_parent = *p;
cp = rb_entry(node_parent, struct call_path, rb_node);

- if (cp->sym == sym && cp->ip == ip)
+ if (cp->sym == sym && cp->ip == ip && cp->dso == dso)
return cp;

- if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+ if (sym < cp->sym || (sym == cp->sym && ip < cp->ip) ||
+ (sym == cp->sym && ip == cp->ip
+ && dso < cp->dso) ||
+ (sym == cp->sym && ip == cp->ip
+ && dso == cp->dso && machine < cp->machine))
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}

- cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+ cp = call_path__new(cpr, parent, machine, dso, sym, ip, in_kernel);
if (!cp)
return NULL;

@@ -438,7 +451,10 @@ static struct call_path *call_path__findnew(struct call_path_root *cpr,
}

struct call_return_processor *
-call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+call_return_processor__new(int (*process_call_path)(struct call_path *cp,
+ void *data),
+ int (*process_call_return)(struct call_return *cr,
+ void *data),
void *data)
{
struct call_return_processor *crp;
@@ -449,7 +465,8 @@ call_return_processor__new(int (*process)(struct call_return *cr, void *data),
crp->cpr = call_path_root__new();
if (!crp->cpr)
goto out_free;
- crp->process = process;
+ crp->process_call_path = process_call_path;
+ crp->process_call_return = process_call_return;
crp->data = data;
return crp;

@@ -492,7 +509,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,

static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
u64 ret_addr, u64 timestamp, u64 ref,
- struct symbol *sym)
+ struct dso *dso, struct symbol *sym)
{
int err;

@@ -502,7 +519,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
if (ts->cnt == 1) {
struct thread_stack_entry *tse = &ts->stack[0];

- if (tse->cp->sym == sym)
+ if (tse->cp->dso == dso && tse->cp->sym == sym)
return thread_stack__call_return(thread, ts, --ts->cnt,
timestamp, ref, false);
}
@@ -540,20 +557,28 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
{
struct call_path_root *cpr = ts->crp->cpr;
struct call_path *cp;
+ struct machine *machine;
+ struct dso *dso = NULL;
struct symbol *sym;
u64 ip;

if (sample->ip) {
ip = sample->ip;
sym = from_al->sym;
+ if (from_al->map)
+ dso = from_al->map->dso;
+ machine = from_al->machine;
} else if (sample->addr) {
ip = sample->addr;
sym = to_al->sym;
+ if (to_al->map)
+ dso = to_al->map->dso;
+ machine = to_al->machine;
} else {
return 0;
}

- cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
+ cp = call_path__findnew(cpr, &cpr->call_path, machine, dso, sym, ip,
ts->kernel_start);
if (!cp)
return -ENOMEM;
@@ -586,6 +611,7 @@ static int thread_stack__no_call_return(struct thread *thread,
/* If the stack is empty, push the userspace address */
if (!ts->cnt) {
cp = call_path__findnew(cpr, &cpr->call_path,
+ to_al->machine, to_al->map->dso,
to_al->sym, sample->addr,
ts->kernel_start);
if (!cp)
@@ -610,7 +636,8 @@ static int thread_stack__no_call_return(struct thread *thread,
parent = &cpr->call_path;

/* This 'return' had no 'call', so push and pop top of stack */
- cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
+ cp = call_path__findnew(cpr, parent, from_al->machine,
+ from_al->map->dso, from_al->sym, sample->ip,
ts->kernel_start);
if (!cp)
return -ENOMEM;
@@ -621,7 +648,7 @@ static int thread_stack__no_call_return(struct thread *thread,
return err;

return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
- to_al->sym);
+ to_al->map->dso, to_al->sym);
}

static int thread_stack__trace_begin(struct thread *thread,
@@ -636,7 +663,7 @@ static int thread_stack__trace_begin(struct thread *thread,

/* Pop trace end */
tse = &ts->stack[ts->cnt - 1];
- if (tse->cp->sym == NULL && tse->cp->ip == 0) {
+ if (tse->cp->dso == NULL && tse->cp->sym == NULL && tse->cp->ip == 0) {
err = thread_stack__call_return(thread, ts, --ts->cnt,
timestamp, ref, false);
if (err)
@@ -657,7 +684,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
return 0;

- cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, NULL, NULL, 0,
ts->kernel_start);
if (!cp)
return -ENOMEM;
@@ -668,14 +695,11 @@ static int thread_stack__trace_end(struct thread_stack *ts,
false);
}

-int thread_stack__process(struct thread *thread, struct comm *comm,
- struct perf_sample *sample,
- struct addr_location *from_al,
- struct addr_location *to_al, u64 ref,
- struct call_return_processor *crp)
+static int __thread_stack__process_init(struct thread *thread,
+ struct comm *comm,
+ struct call_return_processor *crp)
{
struct thread_stack *ts = thread->ts;
- int err = 0;

if (ts) {
if (!ts->crp) {
@@ -694,6 +718,80 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
ts = thread->ts;
ts->comm = comm;
}
+ return 0;
+}
+
+int thread_stack__process_callchain(struct thread *thread, struct comm *comm,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct perf_sample *sample, int max_stack,
+ struct call_return_processor *crp)
+{
+ struct call_path *current = &crp->cpr->call_path;
+ struct thread_stack *ts = NULL;
+ enum chain_order saved_order = callchain_param.order;
+ int err = 0;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ return err;
+
+ err = __thread_stack__process_init(thread, comm, crp);
+ if(err)
+ return err;
+
+ ts = thread->ts;
+
+
+ callchain_param.order = ORDER_CALLER;
+ err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, max_stack);
+ if (err) {
+ callchain_param.order = saved_order;
+ return err;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node;
+ struct dso *dso = NULL;
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+ if (node->map)
+ dso = node->map->dso;
+
+ current = call_path__findnew(ts->crp->cpr, current, machine,
+ dso, node->sym, node->ip,
+ ts->kernel_start);
+
+ callchain_cursor_advance(&callchain_cursor);
+ }
+ callchain_param.order = saved_order;
+
+ if (current == &crp->cpr->call_path) {
+ /* Bail because the callchain was empty. */
+ return 1;
+ }
+
+ err = ts->crp->process_call_path(current,ts->crp->data);
+ return err;
+}
+
+int thread_stack__process(struct thread *thread, struct comm *comm,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref,
+ struct call_return_processor *crp)
+{
+ struct thread_stack *ts = NULL;
+
+ int err = 0;
+
+ err = __thread_stack__process_init(thread, comm, crp);
+ if(err)
+ return err;
+
+ ts = thread->ts;

/* Flush stack on exec */
if (ts->comm != comm && thread->pid_ == thread->tid) {
@@ -717,8 +815,12 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
if (sample->flags & PERF_IP_FLAG_CALL) {
struct call_path_root *cpr = ts->crp->cpr;
struct call_path *cp;
+ struct dso *dso = NULL;
u64 ret_addr;

+ if(to_al->map)
+ dso = to_al->map->dso;
+
if (!sample->ip || !sample->addr)
return 0;

@@ -727,6 +829,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
return 0; /* Zero-length calls are excluded */

cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+ to_al->machine, dso,
to_al->sym, sample->addr,
ts->kernel_start);
if (!cp)
@@ -734,11 +837,16 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
cp, false);
} else if (sample->flags & PERF_IP_FLAG_RETURN) {
+ struct dso *dso = NULL;
+ if(from_al->map)
+ dso = from_al->map->dso;
+
if (!sample->ip || !sample->addr)
return 0;

err = thread_stack__pop_cp(thread, ts, sample->addr,
- sample->time, ref, from_al->sym);
+ sample->time, ref, dso,
+ from_al->sym);
if (err) {
if (err < 0)
return err;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1528f1..7b9615e 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -26,6 +26,7 @@ struct comm;
struct ip_callchain;
struct symbol;
struct dso;
+struct machine;
struct call_return_processor;
struct comm;
struct perf_sample;
@@ -83,6 +84,8 @@ struct call_return {
*/
struct call_path {
struct call_path *parent;
+ struct machine *machine;
+ struct dso *dso;
struct symbol *sym;
u64 ip;
u64 db_id;
@@ -100,9 +103,18 @@ int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);

struct call_return_processor *
-call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+call_return_processor__new(int (*process_call_path)(struct call_path *cp,
+ void *data),
+ int (*process_call_return)(struct call_return *cr,
+ void *data),
void *data);
void call_return_processor__free(struct call_return_processor *crp);
+
+int thread_stack__process_callchain(struct thread *thread, struct comm *comm,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct perf_sample *sample, int max_stack,
+ struct call_return_processor *crp);
int thread_stack__process(struct thread *thread, struct comm *comm,
struct perf_sample *sample,
struct addr_location *from_al,
--
2.7.4