[RFC 2/2] perf: Marker software event and ioctl

From: Pawel Moll
Date: Fri Sep 12 2014 - 07:58:41 EST


This patch adds a PERF_COUNT_SW_MARKER event type, which
can be requested by user and a PERF_EVENT_IOC_MARKER
ioctl command which will inject an event of said type into
the perf buffer. The ioctl can take a zero-terminated
string argument, similar to tracing_marker in ftrace,
which will be kept in the "raw" field of the sample.

The main use case for this is synchronisation of
performance data generated in user space with the perf
stream coming from the kernel. For example, the marker
can be inserted by a JIT engine after it generated
portion of the code, but before the code is executed
for the first time, allowing the post-processor to
pick the correct debugging information. Other example
is a system profiling tool taking data from other
sources than just perf, which generates a marker
at the beginning at at the end of the session
(also possibly periodically during the session) to
synchronise kernel timestamps with clock values
obtained in userspace (gtod or raw_monotonic).

Signed-off-by: Pawel Moll <pawel.moll@xxxxxxx>
---
include/uapi/linux/perf_event.h | 2 ++
kernel/events/core.c | 43 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e5a75c5..83b0f5b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -110,6 +110,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
+ PERF_COUNT_SW_MARKER = 10,

PERF_COUNT_SW_MAX, /* non-ABI */
};
@@ -350,6 +351,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_MARKER _IOR('$', 8, char *)

enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index df093e3..dbce284 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3655,6 +3655,7 @@ static inline int perf_fget_light(int fd, struct fd *p)
static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+static int perf_sw_event_marker(struct perf_event *event, char __user *arg);

static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
@@ -3709,6 +3710,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg);

+ case PERF_EVENT_IOC_MARKER:
+ return perf_sw_event_marker(event, (char __user *)arg);
+
default:
return -ENOTTY;
}
@@ -3728,6 +3732,7 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
switch (_IOC_NR(cmd)) {
case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
case _IOC_NR(PERF_EVENT_IOC_ID):
+ case _IOC_NR(PERF_EVENT_IOC_MARKER):
/* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */
if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
cmd &= ~IOCSIZE_MASK;
@@ -5960,6 +5965,44 @@ static struct pmu perf_swevent = {
.event_idx = perf_swevent_event_idx,
};

+static int perf_sw_event_marker(struct perf_event *event, char __user *arg)
+{
+ struct perf_sample_data data;
+ struct pt_regs *regs = current_pt_regs();
+ struct perf_raw_record raw = { 0, };
+
+ if (!static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_MARKER]))
+ return 0;
+
+ perf_sample_data_init(&data, 0, 0);
+
+ if (arg) {
+ long len = strnlen_user(arg, PAGE_SIZE);
+
+ if (len) {
+ raw.size = ALIGN(len + sizeof(u32), sizeof(u64))
+ - sizeof(u32);
+ raw.data = kzalloc(raw.size, GFP_KERNEL);
+ if (!raw.data)
+ return -ENOMEM;
+
+ if (copy_from_user(raw.data, arg, len)) {
+ kfree(raw.data);
+ return -EFAULT;
+ }
+
+ data.raw = &raw;
+ }
+ }
+
+ perf_event_output(event, &data, regs);
+
+ if (raw.size)
+ kfree(raw.data);
+
+ return 0;
+}
+
#ifdef CONFIG_EVENT_TRACING

static int perf_tp_filter_match(struct perf_event *event,
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/