[RFC 1/4] drivers/nvdimm: Add perf interface to expose nvdimm performance stats

From: Kajol Jain
Date: Wed May 12 2021 - 14:45:38 EST


Patch adds performance stats reporting support for nvdimm.
Added interface includes support for a pmu register function and
callbacks to be used by the arch/platform specific drivers.
User could use the standard perf tool to access perf events exposed
via pmu.

A structure is added called nvdimm_pmu which can be used to add
platform specific data like supported events and callbacks to pmu
functions like event_init/add/delete/read. It also adds
unregister_nvdimm_pmu function to handle unregistering of a pmu device.

Signed-off-by: Kajol Jain <kjain@xxxxxxxxxxxxx>
---
drivers/nvdimm/Makefile | 1 +
drivers/nvdimm/nd_perf.c | 111 +++++++++++++++++++++++++++++++++++++++
include/linux/nd.h | 31 +++++++++++
3 files changed, 143 insertions(+)
create mode 100644 drivers/nvdimm/nd_perf.c

diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 29203f3d3069..25dba6095612 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -18,6 +18,7 @@ nd_e820-y := e820.o
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
+libnvdimm-y += nd_perf.o
libnvdimm-y += dimm.o
libnvdimm-y += region_devs.o
libnvdimm-y += region.o
diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c
new file mode 100644
index 000000000000..d28bec2b61a2
--- /dev/null
+++ b/drivers/nvdimm/nd_perf.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * nd_perf.c: NVDIMM Device Performance Monitoring Unit support
+ *
+ * Perf interface to expose nvdimm performance stats.
+ *
+ * Copyright (C) 2021 IBM Corporation
+ */
+
+#define pr_fmt(fmt) "nvdimm_pmu: " fmt
+
+#include <linux/nd.h>
+
+#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)
+
+static int nvdimm_pmu_event_init(struct perf_event *event)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ /* test the event attr type for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* it does not support event sampling mode */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ /* jump to arch/platform specific callbacks if any */
+ if (nd_pmu && nd_pmu->event_init)
+ return nd_pmu->event_init(event, nd_pmu->dev);
+
+ return 0;
+}
+
+static void nvdimm_pmu_read(struct perf_event *event)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ /* jump to arch/platform specific callbacks if any */
+ if (nd_pmu && nd_pmu->read)
+ nd_pmu->read(event, nd_pmu->dev);
+}
+
+static void nvdimm_pmu_del(struct perf_event *event, int flags)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ /* jump to arch/platform specific callbacks if any */
+ if (nd_pmu && nd_pmu->del)
+ nd_pmu->del(event, flags, nd_pmu->dev);
+}
+
+static int nvdimm_pmu_add(struct perf_event *event, int flags)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (flags & PERF_EF_START)
+ /* jump to arch/platform specific callbacks if any */
+ if (nd_pmu && nd_pmu->add)
+ return nd_pmu->add(event, flags, nd_pmu->dev);
+ return 0;
+}
+
+int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev)
+{
+ int rc;
+
+ if (!nd_pmu || !pdev)
+ return -EINVAL;
+
+ nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ nd_pmu->pmu.event_init = nvdimm_pmu_event_init;
+ nd_pmu->pmu.add = nvdimm_pmu_add;
+ nd_pmu->pmu.del = nvdimm_pmu_del;
+ nd_pmu->pmu.read = nvdimm_pmu_read;
+ nd_pmu->pmu.name = nd_pmu->name;
+ nd_pmu->pmu.attr_groups = nd_pmu->attr_groups;
+ nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_EXCLUDE;
+
+ /*
+ * Adding platform_device->dev pointer to nvdimm_pmu, so that we can
+ * access that device data in PMU callbacks and also pass it to
+ * arch/platform specific code.
+ */
+ nd_pmu->dev = &pdev->dev;
+
+ rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->name, -1);
+ if (rc)
+ return rc;
+
+ pr_info("%s NVDIMM performance monitor support registered\n",
+ nd_pmu->name);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_nvdimm_pmu);
+
+void unregister_nvdimm_pmu(struct pmu *nd_pmu)
+{
+ /*
+ * nd_pmu will get free in arch/platform specific code once
+ * corresponding pmu get unregistered.
+ */
+ perf_pmu_unregister(nd_pmu);
+}
+EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index ee9ad76afbba..fa6e60b2b368 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -8,6 +8,8 @@
#include <linux/ndctl.h>
#include <linux/device.h>
#include <linux/badblocks.h>
+#include <linux/platform_device.h>
+#include <linux/perf_event.h>

enum nvdimm_event {
NVDIMM_REVALIDATE_POISON,
@@ -23,6 +25,35 @@ enum nvdimm_claim_class {
NVDIMM_CCLASS_UNKNOWN,
};

+/**
+ * struct nvdimm_pmu - data structure for nvdimm perf driver
+ *
+ * @name: name of the nvdimm pmu device.
+ * @pmu: pmu data structure for nvdimm performance stats.
+ * @cpu: designated cpu for counter access.
+ * @dev: nvdimm device pointer.
+ * @functions(event_init/add/del/read): platform specific callbacks.
+ * @attr_groups: data structure for events/formats/cpumask.
+ * @node: node for cpu hotplug notifier link.
+ * @cpuhp_state: state for cpu hotplug notification.
+ */
+struct nvdimm_pmu {
+ const char *name;
+ struct pmu pmu;
+ int cpu;
+ struct device *dev;
+ int (*event_init)(struct perf_event *event, struct device *dev);
+ int (*add)(struct perf_event *event, int flags, struct device *dev);
+ void (*del)(struct perf_event *event, int flags, struct device *dev);
+ void (*read)(struct perf_event *event, struct device *dev);
+ const struct attribute_group **attr_groups;
+ struct hlist_node node;
+ enum cpuhp_state cpuhp_state;
+};
+
+int register_nvdimm_pmu(struct nvdimm_pmu *nvdimm, struct platform_device *pdev);
+void unregister_nvdimm_pmu(struct pmu *pmu);
+
struct nd_device_driver {
struct device_driver drv;
unsigned long type;
--
2.27.0