[PATCH v5 3/7] perf/sdt: Directly record SDT events with 'perf record'

From: Ravi Bangoria
Date: Tue Mar 14 2017 - 11:08:30 EST


From: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>

Add basic support for directly recording SDT events which are present
in the probe cache. Without this patch, we could probe into SDT events
using 'perf probe' and 'perf record'. With this patch, we can probe the
SDT events directly using 'perf record'.

After invoking 'perf record', behind the scenes, it checks whether the
event specified is an SDT event using the string 'sdt_' or flag '%'.
After that, it does a lookup of the probe cache to find out the SDT
event. If its not present, it throws an error. Otherwise, it goes on
and writes the event into the uprobe_events file and starts recording.
It also maintains a list of the event names that were written to
uprobe_events file. At the end of the record session, it removes the
events from the uprobe_events file using the maintained name list.

For example:

$ sudo ./perf list sdt
sdt_libpthread:mutex_entry [SDT event]
sdt_libc:setjmp [SDT event]

$ sudo ./perf record -a -e sdt_libc:setjmp

$ sudo ./perf script
bash 793 [002] 260.382957: sdt_libc:setjmp: (7ff85b6596a1)
reset 1296 [000] 260.511983: sdt_libc:setjmp: (7f26862e06a1)

Recording on SDT events with same provider and marker names is also
supported:

$ readelf -n /usr/lib64/libpthread-2.24.so | grep -A2 Provider
Provider: libpthread
Name: mutex_entry
Location: 0x0000000000009ddb, Base: 0x00000000000139cc, ...
--
Provider: libpthread
Name: mutex_entry
Location: 0x000000000000bcbb, Base: 0x00000000000139cc, ...

$ sudo ./perf record -a -e sdt_libpthread:mutex_entry
Warning : Recording on 2 occurences of sdt_libpthread:mutex_entry

$ sudo ./perf evlist
sdt_libpthread:mutex_entry_1
sdt_libpthread:mutex_entry

Note that, it always fetch sdt events in probe cache and ignores
entries of uprobe_events file. Hence, it creates new probe points
for event even if it already exists in uprobe_events.

$ sudo ./perf probe sdt_libpthread:mutex_entry
Added new events:
sdt_libpthread:mutex_entry (on %mutex_entry in /usr/lib64/libpthread-2.24.so)
sdt_libpthread:mutex_entry_1 (on %mutex_entry in /usr/lib64/libpthread-2.24.so)

$ sudo ./perf record -a -e sdt_libpthread:mutex_entry
Warning : Recording on 2 occurences of sdt_libpthread:mutex_entry

$ sudo ./perf evlist
sdt_libpthread:mutex_entry_3
sdt_libpthread:mutex_entry_2

As it does not look at uprobe_events file, it can't record those events
whose probe points are created with different name. For ex,

$ sudo ./perf record -a -e sdt_libpthread:mutex_entry_1
Error: sdt_libpthread:mutex_entry_1 not found in the cache
invalid or unsupported event: 'sdt_libpthread:mutex_entry_1'

Signed-off-by: Hemant Kumar <hemant@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
---
tools/perf/builtin-record.c | 27 ++++++-
tools/perf/util/parse-events.c | 60 +++++++++++++++
tools/perf/util/parse-events.h | 3 +
tools/perf/util/probe-event.c | 165 ++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/probe-event.h | 12 +++
5 files changed, 261 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bc84a37..e8e1f73 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -73,6 +73,7 @@ struct record {
bool timestamp_filename;
struct switch_output switch_output;
unsigned long long samples;
+ struct list_head sdt_event_list;
};

static volatile int auxtrace_record__snapshot_started;
@@ -1503,6 +1504,23 @@ static struct record record = {
},
};

+static int record__parse_events_option(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ if (is_sdt_event((char *) str))
+ return parse_sdt_events_option(opt, str, unset);
+ else
+ return parse_events_option(opt, str, unset);
+}
+
+static void sdt_event_list__remove(void)
+{
+#ifdef HAVE_LIBELF_SUPPORT
+ remove_sdt_event_list(&record.sdt_event_list);
+#endif
+}
+
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
"\n\t\t\t\tDefault: fp";

@@ -1516,9 +1534,10 @@ static bool dry_run;
* using pipes, etc.
*/
static struct option __record_options[] = {
- OPT_CALLBACK('e', "event", &record.evlist, "event",
- "event selector. use 'perf list' to list available events",
- parse_events_option),
+ OPT_CALLBACK_ARG('e', "event", &record.evlist,
+ &record.sdt_event_list, "event",
+ "event selector. use 'perf list' to list available events",
+ record__parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
@@ -1671,6 +1690,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (rec->evlist == NULL)
return -ENOMEM;

+ INIT_LIST_HEAD(&rec->sdt_event_list);
err = perf_config(perf_record_config, rec);
if (err)
return err;
@@ -1841,6 +1861,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
perf_evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
+ sdt_event_list__remove();
return err;
}

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 54355d3..252dac1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1727,6 +1727,66 @@ static void parse_events_print_error(struct parse_events_error *err,

#undef MAX_WIDTH

+/* SDT event needs LIBELF support for creating a probe point */
+#ifdef HAVE_LIBELF_SUPPORT
+int parse_sdt_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+ struct parse_events_error err = { .idx = 0, };
+ char *ptr = NULL;
+ int ret = 0;
+ struct list_head *sdt_evlist;
+ struct sdt_event_list *sdt_event;
+
+ if (str[0] == '%')
+ str++;
+
+ ptr = strdup(str);
+ if (ptr == NULL)
+ return -ENOMEM;
+
+ sdt_evlist = zalloc(sizeof(*sdt_evlist));
+ if (!sdt_evlist) {
+ free(ptr);
+ pr_debug("Error in sdt_evlist memory allocation\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(sdt_evlist);
+
+ /*
+ * If there is an error in this call, no need to free
+ * up sdt_evlist, its already free'ed up in the previous
+ * call. Free up 'ptr' though.
+ */
+ ret = add_sdt_event(ptr, sdt_evlist);
+ if (!ret) {
+ list_for_each_entry(sdt_event, sdt_evlist, list) {
+ ret = parse_events(evlist, sdt_event->name, &err);
+ if (ret < 0)
+ goto ret;
+ }
+ /* Add it to the record struct */
+ list_splice(sdt_evlist, opt->data);
+ }
+
+ret:
+ if (ret) {
+ remove_sdt_event_list(opt->data);
+ parse_events_print_error(&err, str);
+ }
+ free(ptr);
+ return ret;
+}
+#else
+int parse_sdt_events_option(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ return -1;
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
int parse_events_option(const struct option *opt, const char *str,
int unset __maybe_unused)
{
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8c72b0f..8e29cb6 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -197,6 +197,9 @@ int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);

+int parse_sdt_events_option(const struct option *opt, const char *str,
+ int unset);
+
#ifdef HAVE_LIBELF_SUPPORT
/*
* If the probe point starts with '%',
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2b1409f..f725953 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -49,6 +49,7 @@

#define MAX_CMDLEN 256
#define PERFPROBE_GROUP "probe"
+#define MAX_EVENT_LENGTH 512

bool probe_event_dry_run; /* Dry run flag */
struct probe_conf probe_conf;
@@ -1293,7 +1294,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
return err;
}

-static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
{
char *ptr;

@@ -3125,8 +3126,8 @@ static int find_cached_events(struct perf_probe_event *pev,
}

/* Try to find probe_trace_event from all probe caches */
-static int find_cached_events_all(struct perf_probe_event *pev,
- struct probe_trace_event **tevs)
+int find_cached_events_all(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
{
struct probe_trace_event *tmp_tevs = NULL;
struct strlist *bidlist;
@@ -3476,3 +3477,161 @@ int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
tvar->name = NULL;
return 0;
}
+
+static void free_sdt_list(struct list_head *sdt_evlist)
+{
+ struct sdt_event_list *tmp, *ptr;
+
+ if (list_empty(sdt_evlist))
+ return;
+ list_for_each_entry_safe(tmp, ptr, sdt_evlist, list) {
+ list_del(&tmp->list);
+ free(tmp->name);
+ free(tmp);
+ }
+}
+
+/*
+ * Delete the SDT events from uprobe_events file that
+ * were created initially.
+ */
+void remove_sdt_event_list(struct list_head *sdt_events)
+{
+ struct sdt_event_list *sdt_event;
+ struct strfilter *filter = NULL;
+ const char *err = NULL;
+
+ if (list_empty(sdt_events))
+ return;
+
+ list_for_each_entry(sdt_event, sdt_events, list) {
+ if (!filter) {
+ filter = strfilter__new(sdt_event->name, &err);
+ if (!filter)
+ goto free_list;
+ } else {
+ strfilter__or(filter, sdt_event->name, &err);
+ }
+ }
+
+ del_perf_probe_events(filter);
+
+free_list:
+ free_sdt_list(sdt_events);
+}
+
+static int get_sdt_events_from_cache(struct perf_probe_event *pev)
+{
+ int ret = 0;
+
+ pev->ntevs = find_cached_events_all(pev, &pev->tevs);
+
+ if (pev->ntevs < 0) {
+ pr_err("Error: Cache lookup failed (code: %d)\n", pev->ntevs);
+ ret = pev->ntevs;
+ } else if (!pev->ntevs) {
+ pr_err("Error: %s:%s not found in the cache\n",
+ pev->group, pev->event);
+ ret = -EINVAL;
+ } else if (pev->ntevs > 1) {
+ pr_warning("Warning : Recording on %d occurences of %s:%s\n",
+ pev->ntevs, pev->group, pev->event);
+ }
+
+ return ret;
+}
+
+static int add_event_to_sdt_evlist(struct probe_trace_event *tev,
+ struct list_head *sdt_evlist)
+{
+ struct sdt_event_list *tmp;
+
+ tmp = zalloc(sizeof(*tmp));
+ if (!tmp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->name = zalloc(MAX_EVENT_LENGTH * sizeof(char));
+ if (!tmp->name)
+ return -ENOMEM;
+
+ snprintf(tmp->name, strlen(tev->group) + strlen(tev->event) + 2,
+ "%s:%s", tev->group, tev->event);
+ list_add(&tmp->list, sdt_evlist);
+
+ return 0;
+}
+
+static int add_events_to_sdt_evlist(struct perf_probe_event *pev,
+ struct list_head *sdt_evlist)
+{
+ int i, ret;
+
+ for (i = 0; i < pev->ntevs; i++) {
+ ret = add_event_to_sdt_evlist(&pev->tevs[i], sdt_evlist);
+
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Find the SDT event from the cache and if found add it/them
+ * to the uprobe_events file
+ */
+int add_sdt_event(char *event, struct list_head *sdt_evlist)
+{
+ struct perf_probe_event *pev;
+ int ret;
+
+ pev = zalloc(sizeof(*pev));
+ if (!pev)
+ return -ENOMEM;
+
+ pev->sdt = true;
+ pev->uprobes = true;
+
+ /*
+ * Parse event to find the group name and event name of
+ * the sdt event.
+ */
+ ret = parse_perf_probe_event_name(&event, pev);
+ if (ret) {
+ pr_err("Error in parsing sdt event %s\n", event);
+ free(pev);
+ return ret;
+ }
+
+ probe_conf.max_probes = MAX_PROBES;
+ probe_conf.force_add = 1;
+
+ /* Fetch all matching events from cache. */
+ ret = get_sdt_events_from_cache(pev);
+ if (ret < 0)
+ goto free_pev;
+
+ /*
+ * Create probe point for all events by adding them in
+ * uprobe_events file
+ */
+ ret = apply_perf_probe_events(pev, 1);
+ if (ret) {
+ pr_err("Error in adding SDT event : %s\n", event);
+ goto free_pev;
+ }
+
+ /* Add events to sdt_evlist */
+ ret = add_events_to_sdt_evlist(pev, sdt_evlist);
+ if (ret < 0)
+ goto free_pev;
+
+ ret = 0;
+
+free_pev:
+ if (ret < 0)
+ free_sdt_list(sdt_evlist);
+ cleanup_perf_probe_events(pev, 1);
+ free(pev);
+ return ret;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5d4e940..6812230 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -113,6 +113,12 @@ struct variable_list {
struct strlist *vars; /* Available variables */
};

+/* List of sdt events */
+struct sdt_event_list {
+ struct list_head list;
+ char *name; /* group:event */
+};
+
struct map;
int init_probe_symbol_maps(bool user_only);
void exit_probe_symbol_maps(void);
@@ -182,4 +188,10 @@ struct map *get_target_map(const char *target, bool user);
void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
int ntevs);

+int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev);
+
+int find_cached_events_all(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs);
+int add_sdt_event(char *event, struct list_head *sdt_event_list);
+void remove_sdt_event_list(struct list_head *sdt_event_list);
#endif /*_PROBE_EVENT_H */
--
2.9.3