[PATCH v3 31/46] perf/x86/intel/cmt: add subtree read for cgroup events

From: David Carrillo-Cisneros
Date: Sat Oct 29 2016 - 20:45:14 EST


A llc_occupancy read for a cgroup event must read llc_occupancy for all
monrs in or below the event's monr.

The cgroup's monr's pmonr must have a valid rmid for the read to be
meaningful. Descendant pmonrs that do not have a valid read_rmid are
skipped since their occupancy is already included in its Lowest Monitored
Ancestor (lma) pmonr's occupancy.

Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cmt.c | 113 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index f9195ec..275d128 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -1359,6 +1359,110 @@ static struct monr *monr_next_descendant_post(struct monr *pos,
return pos->parent;
}

+static int read_subtree_rmids(u32 root_r, unsigned long *rmids_bm, u64 *total)
+{
+ u64 val;
+ int r, err;
+
+ /* first iteration reads root's rmid. */
+ r = root_r;
+ do {
+ if (r != INVALID_RMID) {
+ err = cmt_rmid_read(r, &val);
+ if (WARN_ON_ONCE(err))
+ return err;
+ (*total) += val;
+ }
+ if (!rmids_bm)
+ break;
+ if (root_r != INVALID_RMID) {
+ root_r = INVALID_RMID;
+ r = find_first_bit(rmids_bm, CMT_MAX_NR_RMIDS);
+ } else {
+ r = find_next_bit(rmids_bm, CMT_MAX_NR_RMIDS, r + 1);
+ }
+ } while (r < CMT_MAX_NR_RMIDS);
+
+ return 0;
+}
+
+/**
+ * pmonr_read_subtree() - Read occupancy for a pmonr subtree.
+ *
+ * Read and add occupancy for all pmonrs in the subtree rooted at
+ * @root_pmonr->monr and in @root_pmonr->pkgd package.
+ * Fast-path for common case of a leaf pmonr, otherwise, a best effort
+ * two-stages read:
+ * 1) read all rmids in subtree with pkgd->lock held, and
+ * 2) read and add occupancy for rmids in previous stage, without locks held.
+ */
+static int pmonr_read_subtree(struct pmonr *root_pmonr, u64 *total)
+{
+ struct monr *pos;
+ struct pkg_data *pkgd = root_pmonr->pkgd;
+ struct pmonr *pmonr;
+ union pmonr_rmids rmids;
+ int err = 0, root_r;
+ unsigned long flags, *rmids_bm = NULL;
+
+ *total = 0;
+ rmids.value = atomic64_read(&root_pmonr->atomic_rmids);
+ /*
+ * The root of the subtree must be in Unused or Active state for the
+ * read to be meaningful (Unused pmonr have zero occupancy), yet its
+ * descendants can be in Dep_{Idle,Dirty} since states use their
+ * Lowest Monitored Ancestor's rmid.
+ */
+ if (rmids.sched_rmid == INVALID_RMID) {
+ /* Unused state. */
+ if (rmids.read_rmid == 0)
+ root_r = INVALID_RMID;
+ else
+ /* Off state. */
+ return -ENODATA;
+ } else {
+ /* Dep_{Idle, Dirty} state. */
+ if (rmids.sched_rmid != rmids.read_rmid)
+ return -ENODATA;
+ /* Active state */
+ root_r = rmids.read_rmid;
+ }
+ /*
+ * Lock-less fast-path for common case of childless monr. No need
+ * to lock for list_empty since either path leads to a read that is
+ * correct at some time close to the moment the check happens.
+ */
+ if (list_empty(&root_pmonr->monr->children))
+ goto read_rmids;
+
+ rmids_bm = kzalloc(CMT_MAX_NR_RMIDS_BYTES, GFP_ATOMIC);
+ if (!rmids_bm)
+ return -ENOMEM;
+
+ /* Lock to protect againsts changes in pmonr hierarchy. */
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+ /* Starts on subtree's first child. */
+ pos = root_pmonr->monr;
+ while ((pos = monr_next_descendant_pre(pos, root_pmonr->monr))) {
+ /* protected by pkgd lock. */
+ pmonr = pkgd_pmonr(pkgd, pos);
+ rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ /* Exclude all pmonrs not in Active or Dep_Dirty states. */
+ if (rmids.sched_rmid == INVALID_RMID ||
+ rmids.read_rmid == INVALID_RMID)
+ continue;
+ __set_bit(rmids.read_rmid, rmids_bm);
+ }
+
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+read_rmids:
+ err = read_subtree_rmids(root_r, rmids_bm, total);
+ kfree(rmids_bm);
+
+ return err;
+}
+
/* Issue reads to CPUs in remote packages. */
static int issue_read_remote_pkgs(struct monr *monr,
struct cmt_csd **issued_ccsds,
@@ -1522,8 +1626,13 @@ static int intel_cmt_event_read(struct perf_event *event)
/* It's a task event. */
err = read_all_pkgs(monr, CMT_IPI_WAIT_TIME, &count);
} else {
- /* To add support in next patches in series */
- return -ENOTSUPP;
+ struct pmonr *pmonr;
+
+ /* It's either a cgroup or a cpu event. */
+ rcu_read_lock();
+ pmonr = rcu_dereference(monr->pmonrs[pkgid]);
+ err = pmonr_read_subtree(pmonr, &count);
+ rcu_read_unlock();
}
if (err)
return err;
--
2.8.0.rc3.226.g39d4020