[PATCH v3 14/46] perf/x86/intel/cmt: add Active and Dep_{Idle, Dirty} states

From: David Carrillo-Cisneros
Date: Sat Oct 29 2016 - 20:46:25 EST


Add remaining states for pmonr's state machine:
- Active: A pmonr that is actively used.
- Dep_Idle: A pmonr that failed to obtain a rmid. It "borrows" its rmid
from its lowest monitored (Active in same pkgd) ancestor in the
monr hierarchy.
- Dep_Dirty: A pmonr that was Active but has lost its rmid (due to rmid
rotation, introduced later in this patch series). It is similar to
Dep_Idle but keeps track of its former rmid in case there is a reuse
opportunity in the future.

This patch adds states, states transition functions for pmonrs.
It also adds infrastructure and usage statistics to struct pkg_data that
will be used later in this series.

The transitions Unused -> Active and Unused -> Dep_Idle are inline because
they will be called during tasks context switch the first time a monr
runs in a package (later in this series).

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cmt.c | 237 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/events/intel/cmt.h | 95 +++++++++++++++++-
2 files changed, 329 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index fb6877f..86c3013 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -142,6 +142,10 @@ static struct pmonr *pmonr_alloc(struct pkg_data *pkgd)
if (!pmonr)
return ERR_PTR(-ENOMEM);

+ /*pmonr_deps_{head, entry} are in a union, initialize one of them. */
+ INIT_LIST_HEAD(&pmonr->pmonr_deps_head);
+ INIT_LIST_HEAD(&pmonr->pkgd_deps_entry);
+ INIT_LIST_HEAD(&pmonr->rot_entry);
pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID);
pmonr->pkgd = pkgd;

@@ -153,9 +157,108 @@ static inline bool monr_is_root(struct monr *monr)
return monr_hrchy_root == monr;
}

+/*
+ * @a must be distinct to @b.
+ * @true if @a is ancestor of @b or equal to it.
+ */
+static inline bool monr_hrchy_is_ancestor(struct monr *a, struct monr *b)
+{
+ if (monr_hrchy_root == a || a == b)
+ return true;
+ if (monr_hrchy_root == b)
+ return false;
+
+ b = b->parent;
+ /* Break at the root */
+ while (b != monr_hrchy_root) {
+ if (a == b)
+ return true;
+ b = b->parent;
+ }
+
+ return false;
+}
+
+/**
+ * pmonr_find_lma() - Find Lowest Monitored Ancestor (lma) of a pmonr.
+ * @pmonr: The pmonr to start the search on.
+ *
+ * Always succeed since pmonrs in monr_hrchy_root are always in Active state.
+ * Return: lma of @pmonr.
+ */
+static struct pmonr *pmonr_find_lma(struct pmonr *pmonr)
+{
+ struct monr *monr = pmonr->monr;
+ struct pkg_data *pkgd = pmonr->pkgd;
+
+ lockdep_assert_held(&pkgd->lock);
+
+ while ((monr = monr->parent)) {
+ /* protected by pkgd lock. */
+ pmonr = pkgd_pmonr(pkgd, monr);
+ if (pmonr->state == PMONR_ACTIVE)
+ return pmonr;
+ }
+ /* Should have hit monr_hrchy_root. */
+ WARN_ON_ONCE(true);
+
+ return pkgd_pmonr(pkgd, monr_hrchy_root);
+}
+
+/**
+ * pmnor_move_all_dependants() - Move all dependants from @old lender to @new.
+ * @old: Old lender.
+ * @new: New lender.
+ *
+ * @new->monr must be ancestor of @old->monr and they must be distinct.
+ */
+static void pmonr_move_all_dependants(struct pmonr *old, struct pmonr *new)
+{
+ struct pmonr *dep;
+ union pmonr_rmids dep_rmids, new_rmids;
+
+ new_rmids.value = atomic64_read(&new->atomic_rmids);
+ /* Update this pmonr's dependants to depend on new lender. */
+ list_for_each_entry(dep, &old->pmonr_deps_head, pmonr_deps_entry) {
+ dep->lender = new;
+ dep_rmids.value = atomic64_read(&dep->atomic_rmids);
+ pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid);
+ }
+ list_splice_tail_init(&old->pmonr_deps_head, &new->pmonr_deps_head);
+}
+
+/**
+ * pmonr_move_dependants() - Move some dependants from @old lender to @new.
+ *
+ * Move @old's dependants that are @new->monr descendants to be @new's
+ * dependants. An opposed to pmonr_move_all_dependants, @new->monr does not
+ * need to be an ancestor of @old->monr.
+ */
+static inline void pmonr_move_dependants(struct pmonr *old, struct pmonr *new)
+{
+ struct pmonr *dep, *tmp;
+ union pmonr_rmids dep_rmids, new_rmids;
+
+ new_rmids.value = atomic64_read(&new->atomic_rmids);
+
+ list_for_each_entry_safe(dep, tmp, &old->pmonr_deps_head,
+ pmonr_deps_entry) {
+ if (!monr_hrchy_is_ancestor(new->monr, dep->monr))
+ continue;
+ list_move_tail(&dep->pmonr_deps_entry, &new->pmonr_deps_head);
+ dep->lender = new;
+ dep_rmids.value = atomic64_read(&dep->atomic_rmids);
+ pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid);
+ }
+}
+
/* pkg_data lock is not required for transition from Off state. */
static void pmonr_to_unused(struct pmonr *pmonr)
{
+ struct pkg_data *pkgd = pmonr->pkgd;
+ struct pmonr *lender;
+ union pmonr_rmids rmids;
+
/*
* Do not warn on re-entering Unused state to simplify cleanup
* of initialized pmonrs that were not scheduled.
@@ -168,6 +271,98 @@ static void pmonr_to_unused(struct pmonr *pmonr)
pmonr_set_rmids(pmonr, INVALID_RMID, 0);
return;
}
+
+ lockdep_assert_held(&pkgd->lock);
+ rmids.value = atomic64_read(&pmonr->atomic_rmids);
+
+ if (pmonr->state == PMONR_ACTIVE) {
+ if (monr_is_root(pmonr->monr)) {
+ WARN_ON_ONCE(!list_empty(&pmonr->pmonr_deps_head));
+ } else {
+ lender = pmonr_find_lma(pmonr);
+ pmonr_move_all_dependants(pmonr, lender);
+ }
+ __set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+
+ } else if (pmonr->state == PMONR_DEP_IDLE ||
+ pmonr->state == PMONR_DEP_DIRTY) {
+
+ pmonr->lender = NULL;
+ list_del_init(&pmonr->pmonr_deps_entry);
+ list_del_init(&pmonr->pkgd_deps_entry);
+
+ if (pmonr->state == PMONR_DEP_DIRTY)
+ __set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+ else
+ pkgd->nr_dep_pmonrs--;
+ } else {
+ WARN_ON_ONCE(true);
+ return;
+ }
+
+ list_del_init(&pmonr->rot_entry);
+ pmonr->state = PMONR_UNUSED;
+ pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID);
+}
+
+static inline void __pmonr_to_active_helper(struct pmonr *pmonr, u32 rmid)
+{
+ struct pkg_data *pkgd = pmonr->pkgd;
+
+ list_move_tail(&pmonr->rot_entry, &pkgd->active_pmonrs);
+ pmonr->state = PMONR_ACTIVE;
+ pmonr_set_rmids(pmonr, rmid, rmid);
+ atomic64_set(&pmonr->last_enter_active, get_jiffies_64());
+}
+
+static inline void pmonr_unused_to_active(struct pmonr *pmonr, u32 rmid)
+{
+ struct pmonr *lender;
+
+ __clear_bit(rmid, pmonr->pkgd->free_rmids);
+ __pmonr_to_active_helper(pmonr, rmid);
+ /*
+ * If monr is root, no ancestor exists to move pmonr to. If monr is
+ * root's child, no dependants of its parent (root) could be moved.
+ * Check both cases separately to avoid unnecessary calls to
+ * pmonr_move_depandants.
+ */
+ if (!monr_is_root(pmonr->monr) && !monr_is_root(pmonr->monr->parent)) {
+ lender = pmonr_find_lma(pmonr);
+ pmonr_move_dependants(lender, pmonr);
+ }
+}
+
+/* helper function for transitions to Dep_{Idle,Dirty} states. */
+static inline void __pmonr_to_dep_helper(
+ struct pmonr *pmonr, struct pmonr *lender, u32 read_rmid)
+{
+ struct pkg_data *pkgd = pmonr->pkgd;
+ union pmonr_rmids lender_rmids;
+
+ pmonr->lender = lender;
+ list_move_tail(&pmonr->pmonr_deps_entry, &lender->pmonr_deps_head);
+ list_move_tail(&pmonr->pkgd_deps_entry, &pkgd->dep_pmonrs);
+
+ if (read_rmid == INVALID_RMID) {
+ list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs);
+ pkgd->nr_dep_pmonrs++;
+ pmonr->state = PMONR_DEP_IDLE;
+ } else {
+ list_move_tail(&pmonr->rot_entry, &pkgd->dep_dirty_pmonrs);
+ pmonr->state = PMONR_DEP_DIRTY;
+ }
+
+ lender_rmids.value = atomic64_read(&lender->atomic_rmids);
+ pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+}
+
+static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
+{
+ struct pmonr *lender;
+
+ lender = pmonr_find_lma(pmonr);
+ __pmonr_to_dep_helper(pmonr, lender, INVALID_RMID);
}

static void pmonr_unused_to_off(struct pmonr *pmonr)
@@ -176,6 +371,43 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
pmonr_set_rmids(pmonr, INVALID_RMID, 0);
}

+static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
+{
+ struct pmonr *lender;
+ union pmonr_rmids rmids;
+
+ lender = pmonr_find_lma(pmonr);
+ pmonr_move_all_dependants(pmonr, lender);
+
+ rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ __pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+}
+
+static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
+{
+ list_del_init(&pmonr->pkgd_deps_entry);
+ /* pmonr will no longer be dependent on pmonr_lender. */
+ list_del_init(&pmonr->pmonr_deps_entry);
+ pmonr_move_dependants(pmonr->lender, pmonr);
+ pmonr->lender = NULL;
+ __pmonr_to_active_helper(pmonr, rmid);
+}
+
+static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
+{
+ __clear_bit(rmid, pmonr->pkgd->free_rmids);
+ pmonr->pkgd->nr_dep_pmonrs--;
+ __pmonr_dep_to_active_helper(pmonr, rmid);
+}
+
+static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
+{
+ union pmonr_rmids rmids;
+
+ rmids.value = atomic64_read(&pmonr->atomic_rmids);
+ __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
+}
+
static void monr_dealloc(struct monr *monr)
{
u16 p, nr_pkgs = topology_max_packages();
@@ -780,6 +1012,11 @@ static struct pkg_data *alloc_pkg_data(int cpu)
pkgd->max_rmid = CMT_MAX_NR_RMIDS - 1;
}

+ INIT_LIST_HEAD(&pkgd->active_pmonrs);
+ INIT_LIST_HEAD(&pkgd->dep_idle_pmonrs);
+ INIT_LIST_HEAD(&pkgd->dep_dirty_pmonrs);
+ INIT_LIST_HEAD(&pkgd->dep_pmonrs);
+
mutex_init(&pkgd->mutex);
raw_spin_lock_init(&pkgd->lock);

diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 05325c8..bf90c26 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -36,6 +36,21 @@
* online cpu. The pmonr handles the CMT and MBM monitoring within its package
* by managing the rmid to write into each CPU that runs a monitored thread.
*
+ * The lma of a pmonr is its closest ancestor pmonr in Active state pmonr.
+ *
+ * A pmonr allocates a rmid when needed, depending of its state (see
+ * enum pmonr_state comments). If a pmonr fails to obtain a free rmid, it
+ * "borrows" the one used by its Lowest Monitored Ancestor (lma).
+ *
+ * The "borrowed" rmid is used when threads are scheduled in so that the
+ * occupancy and memory bandwidth for those threads is accounted for in the
+ * monr hierarchy. Yet, that pmonr cannot use a "borrowed" rmid to read,
+ * since that rmid is not counting the "borrower"'s monr cache events.
+ * Therefore, a pmonr uses rmids in two ways:
+ * (1) to schedule, and (2) to read.
+ * When a pmonr owns a rmid (Active state), that rmid is used for both
+ * schedule and read.
+ *
*
* Locking
*
@@ -56,6 +71,16 @@
* - Off: pmonr is unavailable for monitoring. It's the starting state.
* - Unused: pmonr is available for monitoring but no thread associated to
* this pmonr's monr has been scheduled in this pmonr's package.
+ * - Active: pmonr is actively used. It successfully obtained a free rmid
+ * to sched in/out and uses it to read pmonr's llc_occupancy.
+ * - Dep_Idle: pmonr failed to obtain its own free rmid and is borrowing the
+ * rmid from its lowest Active ancestor monr (its lma monr).
+ * - Dep_Dirty: pmonr was Active but its rmid was stolen. This state differs
+ * from Dep_Idle in that the pmonr keeps a reference to its
+ * former Active rmid. If the pmonr becomes eligible to recoup
+ * its rmid in the near future, this previously used rmid can
+ * be reused even if "dirty" without introducing additional
+ * counting error.
*
* The valid state transitions are:
*
@@ -64,11 +89,37 @@
* Off | Unused monitoring is enabled for a pmonr.
*-----------------------------------------------------------------------------
* Unused | Off monitoring is disabled for a pmonr.
+ * |--------------------------------------------------------------
+ * | Active First thread associated to pmonr is scheduled
+ * | in package and a free rmid is available.
+ * |--------------------------------------------------------------
+ * | Dep_Idle Could not find a free rmid available.
+ *-----------------------------------------------------------------------------
+ * Active | Dep_Dirty rmid is stolen, keep reference to old rmid
+ * | in read_rmid, but do not used to read.
+ * |--------------------------------------------------------------
+ * | Unused pmonr releases the rmid, released rmid can be
+ * | "dirty" and therefore goes to dirty_rmids.
+ *-----------------------------------------------------------------------------
+ * Dep_Idle | Active pmonr receives a "clean" rmid.
+ * |--------------------------------------------------------------
+ * | Unused pmonr is no longer waiting for rmid.
+ *-----------------------------------------------------------------------------
+ * Dep_Dirty | Active dirty rmid is reissued to pmonr that had it
+ * | before the transition.
+ * |--------------------------------------------------------------
+ * | Dep_Idle dirty rmid has become "clean" and is reissued
+ * | to a distinct pmonr (or go to free_rmids).
+ * |--------------------------------------------------------------
+ * | Unused pmonr is no longer waiting for rmid.
*-----------------------------------------------------------------------------
*/
enum pmonr_state {
PMONR_OFF = 0,
PMONR_UNUSED,
+ PMONR_ACTIVE,
+ PMONR_DEP_IDLE,
+ PMONR_DEP_DIRTY,
};

/**
@@ -81,11 +132,11 @@ enum pmonr_state {
* Its values can also used to atomically read the state (preventing
* unnecessary locks of pkgd->lock) in the following way:
* pmonr state
- * | Off Unused
+ * | Off Unused Active Dep_Idle Dep_Dirty
* ============================================================================
- * sched_rmid | INVALID_RMID INVALID_RMID
+ * sched_rmid | INVALID_RMID INVALID_RMID valid lender's lender's
* ----------------------------------------------------------------------------
- * read_rmid | INVALID_RMID 0
+ * read_rmid | INVALID_RMID 0 (same) INVALID_RMID old rmid
*
*/
union pmonr_rmids {
@@ -98,16 +149,42 @@ union pmonr_rmids {

/**
* struct pmonr - per-package componet of MONitored Resources (monr).
+ * @lender: if in Dep_Idle or Dep_Dirty state, it's the pmonr that
+ * lends its rmid to this pmonr. NULL otherwise.
+ * @pmonr_deps_head: List of pmonrs in Dep_Idle or Dep_Dirty state that
+ * borrow their sched_rmid from this pmonr.
+ * @pmonr_deps_entry: Entry into lender's @pmonr_deps_head when in Dep_Idle
+ * or Dep_Dirty state.
+ * @pkgd_deps_entry: When in Dep_Dirty state, the list entry for dep_pmonrs.
* @monr: The monr that contains this pmonr.
* @pkgd: The package data associated with this pmonr.
+ * @rot_entry: List entry to attach to pmonr rotation lists in
+ * pkg_data.
+
+ * @last_enter_active: Time last enter Active state.
* @atomic_rmids: Atomic accesor for this pmonr_rmids.
* @state: The state for this pmonr, note that this can also
* be inferred from the combination of sched_rmid and
* read_rmid in @atomic_rmids.
*/
struct pmonr {
+ struct pmonr *lender;
+ /* save space with union since pmonr is in only one state at a time. */
+ union{
+ struct { /* variables for Active state. */
+ struct list_head pmonr_deps_head;
+ };
+ struct { /* variables for Dep_Idle and Dep_Dirty states. */
+ struct list_head pmonr_deps_entry;
+ struct list_head pkgd_deps_entry;
+ };
+ };
+
struct monr *monr;
struct pkg_data *pkgd;
+ struct list_head rot_entry;
+
+ atomic64_t last_enter_active;

/* all writers are sync'ed by package's lock. */
atomic64_t atomic_rmids;
@@ -130,7 +207,13 @@ struct pmonr {
* @free_rmids: Pool of free rmids.
* @dirty_rmids: Pool of "dirty" rmids that are not referenced
* by a pmonr.
+ * @active_pmonrs: LRU of Active pmonrs.
+ * @dep_idle_pmonrs: LRU of Dep_Idle pmonrs.
+ * @dep_dirty_pmonrs: LRU of Dep_Dirty pmonrs.
+ * @dep_pmonrs: LRU of Dep_Idle and Dep_Dirty pmonrs.
+ * @nr_dep_pmonrs: nr Dep_Idle + nr Dep_Dirty pmonrs.
* @mutex: Hold when modifying this pkg_data.
+ * @mutex_key: lockdep class for pkg_data's mutex.
* @lock: Hold to protect pmonrs in this pkg_data.
* @work_cpu: CPU to run rotation and other batch jobs.
* It must be in the package associated to its
@@ -142,6 +225,12 @@ struct pkg_data {
unsigned long free_rmids[CMT_MAX_NR_RMIDS_LONGS];
unsigned long dirty_rmids[CMT_MAX_NR_RMIDS_LONGS];

+ struct list_head active_pmonrs;
+ struct list_head dep_idle_pmonrs;
+ struct list_head dep_dirty_pmonrs;
+ struct list_head dep_pmonrs;
+ int nr_dep_pmonrs;
+
struct mutex mutex;
raw_spinlock_t lock;

--
2.8.0.rc3.226.g39d4020