[RFC] [PATCH] perf: Attaching an event to a specific PMU

From: Robert Richter
Date: Sun Jul 03 2011 - 11:13:09 EST


Peter,

this is a prototype implementation for attaching an event to a
specific PMU. If there is a general acceptance for this approach I
will create patches for upstream integration and base my current IBS
patches on it.

-Robert


This patch creates device nodes for each pmu using udev:

# ls -l /dev/pmu/
total 0
crw-rw---- 1 root root 254, 5 Jul 8 2011 breakpoint
crw-rw---- 1 root root 254, 4 Jul 8 2011 cpu
crw-rw---- 1 root root 254, 6 Jul 8 2011 proto
crw-rw---- 1 root root 254, 1 Jul 8 2011 software
crw-rw---- 1 root root 254, 2 Jul 8 2011 tracepoint

After opening a device the pmu's file descriptor can be used to attach
an event to it. This works same as attaching an event to a specific
group:

pmu = open("/dev/pmu/proto", O_RDONLY);
...
event = sys_perf_event_open(&attr, 0, -1, pmu, 0);

This patch includes a working example that attaches an event to the
PMU registered with the name 'proto':

# ls -l /dev/pmu/proto
crw-rw---- 1 root root 254, 6 Jul 8 2011 /dev/pmu/proto
# dmesg -c > /dev/null
# ./proto
# dmesg -c
Found event ffff88041de71c00 (config=0000000000f00ba2) for pmu proto (type=6) on cpu -1
Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1

Building the example:

$ cd linux # Linux kernel source dir
$ make -C tools/perf/Documentation/examples CFLAGS=-I../.. proto

This approach works for fixed pmu types and also for dynamically
allocated pmus.

I intend to use this event allocation method to implement AMD
IBS. Other pmus can be implemented similar, such as northbridge and/or
uncore events for x86. The implementation is generic and not limited
to a single architecture, it is useful in every system with multiple
pmus.

Signed-off-by: Robert Richter <robert.richter@xxxxxxx>
---
include/linux/perf_event.h | 1 +
kernel/events/core.c | 179 ++++++++++++++++++++++++++---
tools/perf/Documentation/examples/proto.c | 51 ++++++++
3 files changed, 213 insertions(+), 18 deletions(-)
create mode 100644 tools/perf/Documentation/examples/proto.c

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e76a410..3c5452e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -602,6 +602,7 @@ struct pmu {
struct list_head entry;

struct device *dev;
+ struct device *cldev;
char *name;
int type;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5e70f62..967203c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4,7 +4,8 @@
* Copyright (C) 2008 Thomas Gleixner <tglx@xxxxxxxxxxxxx>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@xxxxxxxxxx>
- * Copyright ï 2009 Paul Mackerras, IBM Corp. <paulus@xxxxxxxxxxx>
+ * Copyright (C) 2009 Paul Mackerras, IBM Corp. <paulus@xxxxxxxxxxx>
+ * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
*
* For licensing details see kernel-base/COPYING
*/
@@ -35,6 +36,7 @@
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>
+#include <linux/cdev.h>

#include "internal.h"

@@ -5510,42 +5512,68 @@ static struct device_attribute pmu_dev_attrs[] = {
__ATTR_NULL,
};

-static int pmu_bus_running;
-static struct bus_type pmu_bus = {
- .name = "event_source",
- .dev_attrs = pmu_dev_attrs,
+static struct pmu_sysfs {
+ int initialized;
+ struct bus_type bus;
+ struct cdev *cdev;
+ unsigned major;
+ struct class *class;
+} pmu_sysfs = {
+ .bus = {
+ .name = "event_source",
+ .dev_attrs = pmu_dev_attrs,
+ },
};

static void pmu_dev_release(struct device *dev)
{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ if (pmu->cldev)
+ device_unregister(pmu->cldev);
kfree(dev);
}

+#define MINORMAX (MINORMASK + 1)
+
static int pmu_dev_alloc(struct pmu *pmu)
{
int ret = -ENOMEM;
+ struct device *dev;
+ struct device *cldev = NULL;

- pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
- if (!pmu->dev)
+ dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+ if (!dev)
goto out;

- device_initialize(pmu->dev);
- ret = dev_set_name(pmu->dev, "%s", pmu->name);
+ device_initialize(dev);
+ ret = dev_set_name(dev, "%s", pmu->name);
if (ret)
goto free_dev;

- dev_set_drvdata(pmu->dev, pmu);
- pmu->dev->bus = &pmu_bus;
- pmu->dev->release = pmu_dev_release;
- ret = device_add(pmu->dev);
+ dev_set_drvdata(dev, pmu);
+ dev->bus = &pmu_sysfs.bus;
+ dev->release = pmu_dev_release;
+ ret = device_add(dev);
if (ret)
goto free_dev;

+ if (pmu_sysfs.class && pmu_sysfs.major && pmu->type < MINORMAX) {
+ cldev = device_create(pmu_sysfs.class, dev,
+ MKDEV(pmu_sysfs.major, pmu->type),
+ NULL, "%s", pmu->name);
+ if (IS_ERR(cldev)) {
+ ret = PTR_ERR(cldev);
+ goto free_dev;
+ }
+ }
+
+ pmu->dev = dev;
+ pmu->cldev = cldev;
out:
return ret;

free_dev:
- put_device(pmu->dev);
+ put_device(dev);
goto out;
}

@@ -5580,7 +5608,7 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type)
}
pmu->type = type;

- if (pmu_bus_running) {
+ if (pmu_sysfs.initialized) {
ret = pmu_dev_alloc(pmu);
if (ret)
goto free_idr;
@@ -5967,6 +5995,38 @@ out:
return ret;
}

+static int perf_pmu_open(struct inode *inode, struct file *file)
+{
+ /* minor number is the pmu->type */
+ file->private_data = (void *)(unsigned long)iminor(inode);
+ return 0;
+}
+
+static const struct file_operations perf_pmu_fops = {
+ .owner = THIS_MODULE,
+ .open = perf_pmu_open,
+};
+
+static int perf_set_pmu_type(int *type, int fd)
+{
+ struct file *file;
+ int fput_needed;
+ int ret = -EBADF;
+
+ file = fget_light(fd, &fput_needed);
+ if (!file)
+ return ret;
+
+ if (file->f_op == &perf_pmu_fops) {
+ *type = (int)(unsigned long)file->private_data;
+ ret = 0;
+ }
+
+ fput_light(file, fput_needed);
+
+ return ret;
+}
+
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@@ -6023,7 +6083,7 @@ SYSCALL_DEFINE5(perf_event_open,
if (event_fd < 0)
return event_fd;

- if (group_fd != -1) {
+ if (perf_set_pmu_type(&attr.type, group_fd) && group_fd != -1) {
group_leader = perf_fget_light(group_fd, &fput_needed);
if (IS_ERR(group_leader)) {
err = PTR_ERR(group_leader);
@@ -6885,6 +6945,36 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}

+static struct pmu perf_proto;
+
+static int perf_proto_init(struct perf_event *event)
+{
+ if (perf_proto.type != event->attr.type)
+ return -ENOENT;
+ pr_info("Found event %p (config=%016llx) for pmu %s (type=%d) on cpu %d\n",
+ event, event->attr.config, perf_proto.name, event->attr.type, event->oncpu);
+ return 0;
+}
+
+static int perf_proto_add(struct perf_event *event, int flags)
+{
+ pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
+ event, event->attr.config, perf_proto.name, event->attr.type, event->oncpu);
+ return 0;
+}
+
+static void perf_proto_del(struct perf_event *event, int flags)
+{
+ pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
+ event, event->attr.config, perf_proto.name, event->attr.type, event->oncpu);
+}
+
+static struct pmu perf_proto = {
+ .event_init = perf_proto_init,
+ .add = perf_proto_add,
+ .del = perf_proto_del,
+};
+
void __init perf_event_init(void)
{
int ret;
@@ -6896,6 +6986,7 @@ void __init perf_event_init(void)
perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
perf_pmu_register(&perf_cpu_clock, NULL, -1);
perf_pmu_register(&perf_task_clock, NULL, -1);
+ perf_pmu_register(&perf_proto, "proto", -1);
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
register_reboot_notifier(&perf_reboot_notifier);
@@ -6904,6 +6995,55 @@ void __init perf_event_init(void)
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
}

+static char *pmu_devnode(struct device *dev, mode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "%s/%s", dev->class->name, dev_name(dev));
+}
+
+static int __init perf_event_chrdev_init(void)
+{
+ static const char name[] = "pmu";
+ int ret = -ENOMEM;
+ struct cdev *cdev;
+ dev_t devt;
+ struct class *class;
+
+ cdev = cdev_alloc();
+ if (!cdev)
+ goto out;
+
+ ret = alloc_chrdev_region(&devt, 0, MINORMAX, name);
+ if (ret)
+ goto out1;
+
+ cdev->owner = THIS_MODULE;
+ cdev->ops = &perf_pmu_fops;
+ kobject_set_name(&cdev->kobj, "%s", name);
+ ret = cdev_add(cdev, devt, MINORMAX);
+ if (ret)
+ goto out2;
+
+ class = class_create(THIS_MODULE, name);
+ if (IS_ERR(class)) {
+ ret = PTR_ERR(class);
+ goto out3;
+ }
+ class->devnode = pmu_devnode;
+
+ pmu_sysfs.class = class;
+ pmu_sysfs.cdev = cdev;
+ pmu_sysfs.major = MAJOR(devt);
+out:
+ return ret;
+out3:
+ cdev_del(cdev);
+out2:
+ unregister_chrdev_region(devt, MINORMAX);
+out1:
+ kobject_put(&cdev->kobj);
+ goto out;
+}
+
static int __init perf_event_sysfs_init(void)
{
struct pmu *pmu;
@@ -6911,7 +7051,10 @@ static int __init perf_event_sysfs_init(void)

mutex_lock(&pmus_lock);

- ret = bus_register(&pmu_bus);
+ ret = perf_event_chrdev_init();
+ WARN(ret, "Unable to create pmu char device, reason %d\n", ret);
+
+ ret = bus_register(&pmu_sysfs.bus);
if (ret)
goto unlock;

@@ -6922,7 +7065,7 @@ static int __init perf_event_sysfs_init(void)
ret = pmu_dev_alloc(pmu);
WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
}
- pmu_bus_running = 1;
+ pmu_sysfs.initialized = 1;
ret = 0;

unlock:
diff --git a/tools/perf/Documentation/examples/proto.c b/tools/perf/Documentation/examples/proto.c
new file mode 100644
index 0000000..967260f
--- /dev/null
+++ b/tools/perf/Documentation/examples/proto.c
@@ -0,0 +1,51 @@
+/*
+ * Prototype to attach an event to a specific PMU
+ *
+ * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ * Sample code that attaches an event to a specified PMU.
+ *
+ * # ls -l /dev/pmu/proto
+ * crw-rw---- 1 root root 254, 6 Jul 8 2011 /dev/pmu/proto
+ * # dmesg -c > /dev/null
+ * # ./proto
+ * # dmesg -c
+ * Found event ffff88041de71c00 (config=0000000000f00ba2) for pmu proto (type=6) on cpu -1
+ * Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ * Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ * Adding event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ * Removing event ffff88041de71c00 (config=0000000000f00ba2) to pmu proto (type=6) on cpu 1
+ *
+ * Building:
+ *
+ * $ cd linux # Linux kernel source dir
+ * $ make -C tools/perf/Documentation/examples CFLAGS=-I../.. proto
+ */
+
+#include <fcntl.h>
+#include <err.h>
+
+#include "perf.h"
+
+int main (int argc, char *argv[])
+{
+ int pmu, event;
+ struct perf_event_attr attr = { 0 };
+
+ pmu = open("/dev/pmu/proto", O_RDONLY);
+ if (pmu == -1)
+ err(1, "pmu not found");
+
+ attr.config = 0xf00ba2;
+
+ event = sys_perf_event_open(&attr, 0, -1, pmu, 0);
+ if (event == -1) {
+ close(pmu);
+ err(1, "event creation failed");
+ }
+
+ close(event);
+ close(pmu);
+
+ exit(0);
+}
--
1.7.5.3


--
Advanced Micro Devices, Inc.
Operating System Research Center

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/