[PATCH v3 42/46] perf/x86/intel/cmt: add rmid stealing

From: David Carrillo-Cisneros
Date: Sat Oct 29 2016 - 20:41:45 EST


Add rmid rotation code to steal an rmid whenever not enough
pmonrs are being reactivated.

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>
---
arch/x86/events/intel/cmt.c | 149 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 144 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index ba82f95..e677511 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -1368,6 +1368,106 @@ static int try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
return nr_reused;
}

+/**
+ * can_steal_rmid() - Tell if this pmonr's rmid can be stolen.
+ *
+ * The "rmid cycle" for a pmonr starts when an Active pmonr gets its rmid
+ * stolen and completes when it receives a rmid again.
+ * A monr "rmid recoup" occurs when all its non Off/Unused pmonrs
+ * obtain a rmid (i.e. when all pmonr than need a rmid have one).
+ *
+ * A pmonr's rmid can be stolen if either:
+ * 1) No other pmonr in pmonr's monr has been stolen before, or
+ * 2) Some pmonrs have had rmids stolen but rmids for all pmonrs have been
+ * recovered (rmid recoup) and kept for at least
+ * __cmt_pre_mon_slice + __cmt_min_mon_slice time.
+ * 3) At least one of the pmonrs with pkgid smaller than @pmonr's has not
+ * completed its first "rmid cycle". Once this condition is false, the pmonr
+ * will have completed its last "rmid cycle" and stealing will no be longer
+ * allowed.
+ * This guarantees that the last "rmid cycle" of a pmonr occurs in
+ * pkgid order, preventing rmid deadlocks. It also guarantees that eventually
+ * all pmonrs will eventually have a last "rmid cycle", recovering all
+ * required rmids.
+ */
+static bool can_steal_rmid(struct pmonr *pmonr)
+{
+ union pmonr_rmids rmids;
+ struct monr *monr = pmonr->monr;
+ struct pkg_data *pkgd = NULL;
+ struct pmonr *pos_pmonr;
+ bool need_rmid_state;
+ u64 last_all_active, next_steal_time, last_pmonr_active;
+
+ last_all_active = atomic64_read(&monr->last_rmid_recoup);
+ /*
+ * Can steal if no pmonr has been stolen or all not Unused have been
+ * in Active state for long enough.
+ */
+ if (!atomic_read(&monr->nr_dep_pmonrs)) {
+ /* Check steal condition 1. */
+ if (!last_all_active)
+ return true;
+ next_steal_time = last_all_active +
+ __cmt_pre_mon_slice + __cmt_min_mon_slice;
+ /* Check steal condition 2. */
+ if (time_after64(next_steal_time, get_jiffies_64()))
+ return true;
+
+ return false;
+ }
+
+ rcu_read_lock();
+
+ /* Check for steal condition 3 without locking. */
+ while ((pkgd = cmt_pkgs_data_next_rcu(pkgd))) {
+ /* To avoid deadlocks, wait for pmonr in pkgid order. */
+ if (pkgd->pkgid >= pmonr->pkgd->pkgid)
+ break;
+ pos_pmonr = pkgd_pmonr(pkgd, monr);
+ rmids.value = atomic64_read(&pos_pmonr->atomic_rmids);
+ last_pmonr_active = atomic64_read(
+ &pos_pmonr->last_enter_active);
+
+ /* pmonrs in Dep_{Idle,Dirty} states are waiting for a rmid. */
+ need_rmid_state = rmids.sched_rmid != INVALID_RMID &&
+ rmids.sched_rmid != rmids.read_rmid;
+
+ /* test if pos_pmonr has finished its first rmid cycle. */
+ if (need_rmid_state && last_all_active <= last_pmonr_active) {
+ rcu_read_unlock();
+
+ return true;
+ }
+ }
+ rcu_read_unlock();
+
+ return false;
+}
+
+/* Steal as many rmids as possible, up to @max_to_steal. */
+static int try_steal_active_pmonrs(struct pkg_data *pkgd,
+ unsigned int max_to_steal)
+{
+ struct pmonr *pmonr, *tmp;
+ unsigned long flags;
+ int nr_stolen = 0;
+
+ raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+ list_for_each_entry_safe(pmonr, tmp, &pkgd->active_pmonrs, rot_entry) {
+ if (!can_steal_rmid(pmonr))
+ continue;
+ pmonr_active_to_dep_dirty(pmonr);
+ nr_stolen++;
+ if (nr_stolen == max_to_steal)
+ break;
+ }
+ raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+ return nr_stolen;
+}
+
static inline int __try_use_free_rmid(struct pkg_data *pkgd, u32 rmid)
{
struct pmonr *pmonr;
@@ -1485,9 +1585,17 @@ static int try_free_dirty_rmids(struct pkg_data *pkgd,
* @pkgd: The package data to rotate rmids on.
* @active_goal: Target min nr of pmonrs to put in Active state.
* @max_dirty_thld: Upper bound for dirty_thld, in CMT cache units.
+ * @max_dirty_goal: Max nr of rmids to leave dirty, waiting to drop
+ * occupancy.
+ * @dirty_cushion: nr of rmids to try to leave in dirty on top of the
+ * nr of pmonrs that need rmid (Dep_Idle), in case
+ * some dirty rmids do not drop occupancy fast enough.
*
* The goals for each iteration of rotation logic are:
* 1) to activate @active_goal pmonrs.
+ * 2) if any pmonr is waiting for rmid (Dep_Idle), to steal enough rmids to
+ * meet its dirty_goal. The dirty_goal is an estimate of the number of dirty
+ * rmids required so that next call reaches its @active_goal.
*
* In order to activate Dep_{Dirty,Idle} pmonrs, rotation logic:
* 1) activate eligible Dep_Dirty pmonrs: These pmonrs can reuse their former
@@ -1503,12 +1611,14 @@ static int try_free_dirty_rmids(struct pkg_data *pkgd,
* rmid.
*/
static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
- unsigned int active_goal, unsigned int max_dirty_thld)
+ unsigned int active_goal, unsigned int max_dirty_thld,
+ unsigned int max_dirty_goal, unsigned int dirty_cushion)
{
unsigned int dirty_thld = 0, min_dirty, nr_activated;
- unsigned int nr_dep_pmonrs;
+ unsigned int nr_to_steal, nr_stolen;
+ unsigned int nr_dirty, dirty_goal, nr_dep_pmonrs;
unsigned long flags, *rmids_bm = NULL;
- bool do_active_goal, read_dirty = true, dirty_is_max;
+ bool do_active_goal, do_dirty_goal, read_dirty = true, dirty_is_max;

lockdep_assert_held(&pkgd->mutex);

@@ -1534,6 +1644,7 @@ static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,

raw_spin_lock_irqsave(&pkgd->lock, flags);
nr_activated += __try_use_free_rmids(pkgd);
+ nr_dirty = pkgd->nr_dirty_rmids;
nr_dep_pmonrs = pkgd->nr_dep_pmonrs;
raw_spin_unlock_irqrestore(&pkgd->lock, flags);

@@ -1544,14 +1655,27 @@ static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
dirty_is_max = dirty_thld >= max_dirty_thld;
do_active_goal = nr_activated < active_goal && !dirty_is_max;

+ dirty_goal = min(max_dirty_goal, nr_dep_pmonrs + dirty_cushion);
+ do_dirty_goal = nr_dirty < dirty_goal;
+
/*
* Since Dep_Dirty pmonrs have their own dirty rmid, only Dep_Idle
* pmonrs are waiting for a rmid to be available. Stop if no pmonr
* wait for rmid or no goals to pursue.
*/
- if (!nr_dep_pmonrs || !do_active_goal)
+ if (!nr_dep_pmonrs || (!do_dirty_goal && !do_active_goal))
goto exit;

+ if (do_dirty_goal) {
+ nr_to_steal = dirty_goal - nr_dirty;
+ nr_stolen = try_steal_active_pmonrs(pkgd, nr_to_steal);
+ /*
+ * It tried to steal from all Active pmonrs, makes no sense
+ * to reattempt.
+ */
+ max_dirty_goal = 0;
+ }
+
/*
* Try to activate more pmonrs by increasing the dirty threshold.
* Using the minimum observed occupancy in dirty rmids guarantees to
@@ -1633,6 +1757,7 @@ static void intel_cmt_rmid_rotation_work(struct work_struct *work)
/* not precise elapsed time, but good enough for rotation purposes. */
unsigned int elapsed_ms = intel_cmt_pmu.hrtimer_interval_ms;
unsigned int active_goal, max_dirty_threshold;
+ unsigned int dirty_cushion, max_dirty_goal;

pkgd = container_of(to_delayed_work(work),
struct pkg_data, rotation_work);
@@ -1649,7 +1774,21 @@ static void intel_cmt_rmid_rotation_work(struct work_struct *work)
active_goal = max(1u, (elapsed_ms * __cmt_min_progress_rate) / 1000);
max_dirty_threshold = READ_ONCE(__cmt_max_threshold) / cmt_l3_scale;

- __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold);
+ /*
+ * Upper bound for the nr of rmids to be dirty in order to have a good
+ * chance of finding enough rmids in next iteration of rotation logic.
+ */
+ max_dirty_goal = min(active_goal + 1, (pkgd->max_rmid + 1) / 4);
+
+ /*
+ * Nr of extra rmids to put in dirty in case some don't drop occupancy.
+ * To be calculated in a sensible manner once statistics about rmid
+ * recycling rate are in place.
+ */
+ dirty_cushion = 2;
+
+ __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold,
+ max_dirty_goal, dirty_cushion);

if (intel_cmt_need_rmid_rotation(pkgd))
__intel_cmt_schedule_rotation_for_pkg(pkgd);
--
2.8.0.rc3.226.g39d4020