[PATCH v3 04/46] perf/x86/intel/cmt: add device initialization and CPU hotplug support

From: David Carrillo-Cisneros
Date: Sat Oct 29 2016 - 20:49:42 EST


Introduce struct pkg_data to store per CPU package locks and data for
new CMT driver.

Each pkg_data is initialiazed/terminated on demand when the first/last CPU
in its package goes online/offline.

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/Makefile | 1 +
arch/x86/events/intel/cmt.c | 268 +++++++++++++++++++++++++++++++++++++++++
arch/x86/events/intel/cmt.h | 29 +++++
include/linux/cpuhotplug.h | 2 +
4 files changed, 300 insertions(+)
create mode 100644 arch/x86/events/intel/cmt.c
create mode 100644 arch/x86/events/intel/cmt.h

diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index e9d8520..02fecbc 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
intel-cstate-objs := cstate.o
+obj-$(CONFIG_INTEL_RDT_M) += cmt.o
diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
new file mode 100644
index 0000000..267a9ec
--- /dev/null
+++ b/arch/x86/events/intel/cmt.c
@@ -0,0 +1,268 @@
+/*
+ * Intel Cache Monitoring Technology (CMT) support.
+ */
+
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "cmt.h"
+#include "../perf_event.h"
+
+static DEFINE_MUTEX(cmt_mutex);
+
+static unsigned int cmt_l3_scale; /* cmt hw units to bytes. */
+
+static unsigned int __min_max_rmid; /* minimum max_rmid across all pkgs. */
+
+/* Array of packages (array of pkgds). It's protected by RCU or cmt_mutex. */
+static struct pkg_data **cmt_pkgs_data;
+
+/*
+ * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
+ * Otherwise next online pkg_data or NULL if no more.
+ */
+static struct pkg_data *cmt_pkgs_data_next_rcu(struct pkg_data *pkgd)
+{
+ u16 p, nr_pkgs = topology_max_packages();
+
+ if (!pkgd)
+ return rcu_dereference_check(cmt_pkgs_data[0],
+ lockdep_is_held(&cmt_mutex));
+ p = pkgd->pkgid + 1;
+ pkgd = NULL;
+
+ while (!pkgd && p < nr_pkgs) {
+ pkgd = rcu_dereference_check(cmt_pkgs_data[p++],
+ lockdep_is_held(&cmt_mutex));
+ }
+
+ return pkgd;
+}
+
+static void free_pkg_data(struct pkg_data *pkg_data)
+{
+ kfree(pkg_data);
+}
+
+/* Init pkg_data for @cpu 's package. */
+static struct pkg_data *alloc_pkg_data(int cpu)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct pkg_data *pkgd;
+ int numa_node = cpu_to_node(cpu);
+ u16 pkgid = topology_logical_package_id(cpu);
+
+ if (c->x86_cache_occ_scale != cmt_l3_scale) {
+ /* 0 scale must have been converted to 1 automatically. */
+ if (c->x86_cache_occ_scale || cmt_l3_scale != 1) {
+ pr_err("Multiple LLC scale values, disabling CMT support.\n");
+ return ERR_PTR(-ENXIO);
+ }
+ }
+
+ pkgd = kzalloc_node(sizeof(*pkgd), GFP_KERNEL, numa_node);
+ if (!pkgd)
+ return ERR_PTR(-ENOMEM);
+
+ pkgd->max_rmid = c->x86_cache_max_rmid;
+
+ pkgd->work_cpu = cpu;
+ pkgd->pkgid = pkgid;
+
+ __min_max_rmid = min(__min_max_rmid, pkgd->max_rmid);
+
+ return pkgd;
+}
+
+static void __terminate_pkg_data(struct pkg_data *pkgd)
+{
+ lockdep_assert_held(&cmt_mutex);
+
+ free_pkg_data(pkgd);
+}
+
+static int init_pkg_data(int cpu)
+{
+ struct pkg_data *pkgd;
+ u16 pkgid = topology_logical_package_id(cpu);
+
+ lockdep_assert_held(&cmt_mutex);
+
+ /* Verify that this pkgid isn't already initialized. */
+ if (WARN_ON_ONCE(cmt_pkgs_data[pkgid]))
+ return -EPERM;
+
+ pkgd = alloc_pkg_data(cpu);
+ if (IS_ERR(pkgd))
+ return PTR_ERR(pkgd);
+
+ rcu_assign_pointer(cmt_pkgs_data[pkgid], pkgd);
+ synchronize_rcu();
+
+ return 0;
+}
+
+static int intel_cmt_hp_online_enter(unsigned int cpu)
+{
+ struct pkg_data *pkgd;
+ u16 pkgid = topology_logical_package_id(cpu);
+
+ rcu_read_lock();
+ pkgd = rcu_dereference(cmt_pkgs_data[pkgid]);
+ if (pkgd->work_cpu >= nr_cpu_ids)
+ pkgd->work_cpu = cpu;
+
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int intel_cmt_hp_online_exit(unsigned int cpu)
+{
+ struct pkg_data *pkgd;
+ u16 pkgid = topology_logical_package_id(cpu);
+
+ rcu_read_lock();
+ pkgd = rcu_dereference(cmt_pkgs_data[pkgid]);
+ if (pkgd->work_cpu == cpu)
+ pkgd->work_cpu = cpumask_any_but(
+ topology_core_cpumask(cpu), cpu);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int intel_cmt_prep_up(unsigned int cpu)
+{
+ struct pkg_data *pkgd;
+ int err = 0;
+ u16 pkgid = topology_logical_package_id(cpu);
+
+ mutex_lock(&cmt_mutex);
+ pkgd = rcu_dereference_protected(cmt_pkgs_data[pkgid],
+ lockdep_is_held(&cmt_mutex));
+ if (!pkgd)
+ err = init_pkg_data(cpu);
+ mutex_unlock(&cmt_mutex);
+
+ return err;
+}
+
+static int intel_cmt_prep_down(unsigned int cpu)
+{
+ struct pkg_data *pkgd;
+ u16 pkgid = topology_logical_package_id(cpu);
+
+ mutex_lock(&cmt_mutex);
+ pkgd = rcu_dereference_protected(cmt_pkgs_data[pkgid],
+ lockdep_is_held(&cmt_mutex));
+ if (pkgd->work_cpu >= nr_cpu_ids) {
+ /* will destroy pkgd */
+ __terminate_pkg_data(pkgd);
+ RCU_INIT_POINTER(cmt_pkgs_data[pkgid], NULL);
+ synchronize_rcu();
+ }
+ mutex_unlock(&cmt_mutex);
+
+ return 0;
+}
+
+static const struct x86_cpu_id intel_cmt_match[] = {
+ { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CMT_OCCUP_LLC },
+ {}
+};
+
+static void cmt_dealloc(void)
+{
+ kfree(cmt_pkgs_data);
+ cmt_pkgs_data = NULL;
+}
+
+static int __init cmt_alloc(void)
+{
+ cmt_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+ if (cmt_l3_scale == 0)
+ cmt_l3_scale = 1;
+
+ cmt_pkgs_data = kcalloc(topology_max_packages(),
+ sizeof(*cmt_pkgs_data), GFP_KERNEL);
+ if (!cmt_pkgs_data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int __init cmt_start(void)
+{
+ char *str, scale[20];
+ int err;
+
+ /* will be modified by init_pkg_data() in intel_cmt_prep_up(). */
+ __min_max_rmid = UINT_MAX;
+ err = cpuhp_setup_state(CPUHP_PERF_X86_CMT_PREP,
+ "PERF_X86_CMT_PREP",
+ intel_cmt_prep_up,
+ intel_cmt_prep_down);
+ if (err)
+ return err;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_CMT_ONLINE,
+ "AP_PERF_X86_CMT_ONLINE",
+ intel_cmt_hp_online_enter,
+ intel_cmt_hp_online_exit);
+ if (err)
+ goto rm_prep;
+
+ snprintf(scale, sizeof(scale), "%u", cmt_l3_scale);
+ str = kstrdup(scale, GFP_KERNEL);
+ if (!str) {
+ err = -ENOMEM;
+ goto rm_online;
+ }
+
+ return 0;
+
+rm_online:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_CMT_ONLINE);
+rm_prep:
+ cpuhp_remove_state(CPUHP_PERF_X86_CMT_PREP);
+
+ return err;
+}
+
+static int __init intel_cmt_init(void)
+{
+ struct pkg_data *pkgd = NULL;
+ int err = 0;
+
+ if (!x86_match_cpu(intel_cmt_match)) {
+ err = -ENODEV;
+ goto err_exit;
+ }
+
+ err = cmt_alloc();
+ if (err)
+ goto err_exit;
+
+ err = cmt_start();
+ if (err)
+ goto err_dealloc;
+
+ pr_info("Intel CMT enabled with ");
+ rcu_read_lock();
+ while ((pkgd = cmt_pkgs_data_next_rcu(pkgd))) {
+ pr_cont("%d RMIDs for pkg %d, ",
+ pkgd->max_rmid + 1, pkgd->pkgid);
+ }
+ rcu_read_unlock();
+ pr_cont("and l3 scale of %d KBs.\n", cmt_l3_scale);
+
+ return err;
+
+err_dealloc:
+ cmt_dealloc();
+err_exit:
+ pr_err("Intel CMT registration failed with error: %d\n", err);
+ return err;
+}
+
+device_initcall(intel_cmt_init);
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
new file mode 100644
index 0000000..8c16797
--- /dev/null
+++ b/arch/x86/events/intel/cmt.h
@@ -0,0 +1,29 @@
+/*
+ * Intel Monitoring Technology (CMT) support.
+ * (formerly Intel Cache QoS Monitoring, CQM)
+ *
+ *
+ * Locking
+ *
+ * One global cmt_mutex. One mutex and spin_lock per package.
+ * cmt_pkgs_data is RCU protected.
+ *
+ * Rules:
+ * - cmt_mutex: Hold for CMT init/terminate, event init/terminate,
+ * cgroup start/stop.
+ */
+
+/**
+ * struct pkg_data - Per-package CMT data.
+ *
+ * @work_cpu: CPU to run rotation and other batch jobs.
+ * It must be in the package associated to its
+ * instance of pkg_data.
+ * @max_rmid: Max rmid valid for CPUs in this package.
+ * @pkgid: The logical package id for this pkgd.
+ */
+struct pkg_data {
+ unsigned int work_cpu;
+ u32 max_rmid;
+ u16 pkgid;
+};
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 320a3be..604660a 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -11,6 +11,7 @@ enum cpuhp_state {
CPUHP_PERF_X86_UNCORE_PREP,
CPUHP_PERF_X86_AMD_UNCORE_PREP,
CPUHP_PERF_X86_RAPL_PREP,
+ CPUHP_PERF_X86_CMT_PREP,
CPUHP_PERF_BFIN,
CPUHP_PERF_POWER,
CPUHP_PERF_SUPERH,
@@ -110,6 +111,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
CPUHP_AP_PERF_X86_RAPL_ONLINE,
+ CPUHP_AP_PERF_X86_CMT_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
--
2.8.0.rc3.226.g39d4020