Re: [PATCH 09/12] x86/cqm: Add RMID reuse

From: Shivappa Vikas
Date: Tue Jan 17 2017 - 19:47:56 EST




On Tue, 17 Jan 2017, Thomas Gleixner wrote:

On Fri, 6 Jan 2017, Vikas Shivappa wrote:
+static void cqm_schedule_rmidwork(int domain);

This forward declaration is required because all callers of that function
are coming _after_ the function implementation, right?

+static inline bool is_first_cqmwork(int domain)
+{
+ return (!atomic_cmpxchg(&cqm_pkgs_data[domain]->reuse_scheduled, 0, 1));

What's the purpose of these outer brackets? Enhanced readability?

Will fix the coding styles and ordering of function declarations


+}
+
static void __put_rmid(u32 rmid, int domain)
{
struct cqm_rmid_entry *entry;
@@ -294,6 +301,93 @@ static void cqm_mask_call(struct rmid_read *rr)
static unsigned int __intel_cqm_threshold;
static unsigned int __intel_cqm_max_threshold;

+/*
+ * Test whether an RMID has a zero occupancy value on this cpu.

This tests whether the occupancy value is less than
__intel_cqm_threshold. Unless I'm missing something the value can be set by
user space and therefor is not necessarily zero.

Your commentry is really useful: It's either wrong or superflous or non
existent.

Will fix , yes its the user configurable value.


+ */
+static void intel_cqm_stable(void)
+{
+ struct cqm_rmid_entry *entry;
+ struct list_head *llist;
+
+ llist = &cqm_pkgs_data[pkg_id]->cqm_rmid_limbo_lru;
+ list_for_each_entry(entry, llist, list) {
+
+ if (__rmid_read(entry->rmid) < __intel_cqm_threshold)
+ entry->state = RMID_AVAILABLE;
+ }
+}
+
+static void __intel_cqm_rmid_reuse(void)
+{
+ struct cqm_rmid_entry *entry, *tmp;
+ struct list_head *llist, *flist;
+ struct pkg_data *pdata;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+ pdata = cqm_pkgs_data[pkg_id];
+ llist = &pdata->cqm_rmid_limbo_lru;
+ flist = &pdata->cqm_rmid_free_lru;
+
+ if (list_empty(llist))
+ goto end;
+ /*
+ * Test whether an RMID is free
+ */
+ intel_cqm_stable();
+
+ list_for_each_entry_safe(entry, tmp, llist, list) {
+
+ if (entry->state == RMID_DIRTY)
+ continue;
+ /*
+ * Otherwise remove from limbo and place it onto the free list.
+ */
+ list_del(&entry->list);
+ list_add_tail(&entry->list, flist);

This is really a performance optimized implementation. Iterate the limbo
list first and check the occupancy value, conditionally update the state
and then iterate the same list and conditionally move the entries which got
just updated to the free list. Of course all of this happens with
interrupts disabled and a global lock held to enhance it further.

Will fix this. This can really use the per pkg lock as the cache_lock global one is protecting the event list. Also yes, dont need two loops now as its per package.


+ }
+
+end:
+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static bool reschedule_cqm_work(void)
+{
+ unsigned long flags;
+ bool nwork = false;
+
+ raw_spin_lock_irqsave(&cache_lock, flags);
+
+ if (!list_empty(&cqm_pkgs_data[pkg_id]->cqm_rmid_limbo_lru))
+ nwork = true;
+ else
+ atomic_set(&cqm_pkgs_data[pkg_id]->reuse_scheduled, 0U);

0U because the 'val' argument of atomic_set() is 'int', right?

Will fix


+ raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+ return nwork;
+}
+
+static void cqm_schedule_rmidwork(int domain)
+{
+ struct delayed_work *dwork;
+ unsigned long delay;
+
+ dwork = &cqm_pkgs_data[domain]->intel_cqm_rmid_work;
+ delay = msecs_to_jiffies(RMID_DEFAULT_QUEUE_TIME);
+
+ schedule_delayed_work_on(cqm_pkgs_data[domain]->rmid_work_cpu,
+ dwork, delay);
+}
+
+static void intel_cqm_rmid_reuse(struct work_struct *work)

Your naming conventions are really random. cqm_* intel_cqm_* and then
totaly random function names. Is there any deeper thought involved or is it
just what it looks like: random?

+{
+ __intel_cqm_rmid_reuse();
+
+ if (reschedule_cqm_work())
+ cqm_schedule_rmidwork(pkg_id);

Great stuff. You first try to move the limbo RMIDs to the free list and
then you reevaluate the limbo list again. Thereby acquiring and dropping
the global cache lock several times.

Dammit, the stupid reuse function already knows whether the list is empty
or not. But returning that information would make too much sense.

Will fix. Dont need the global lock also.


+}
+
static struct pmu intel_cqm_pmu;

static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
@@ -548,6 +642,8 @@ static int intel_cqm_setup_event(struct perf_event *event,
if (!event->hw.cqm_rmid)
return -ENOMEM;

+ cqm_assign_rmid(event, event->hw.cqm_rmid);

Oh, so now there is a second user which calls cqm_assign_rmid(). Finally it
might actually do something. Whether that something is useful and
functional, I seriously doubt it.

+
return 0;
}

@@ -863,6 +959,7 @@ static void intel_cqm_event_terminate(struct perf_event *event)
{
struct perf_event *group_other = NULL;
unsigned long flags;
+ int d;

mutex_lock(&cache_mutex);
/*
@@ -905,6 +1002,13 @@ static void intel_cqm_event_terminate(struct perf_event *event)
mbm_stop_timers();

mutex_unlock(&cache_mutex);
+
+ for (d = 0; d < cqm_socket_max; d++) {
+
+ if (cqm_pkgs_data[d] != NULL && is_first_cqmwork(d)) {
+ cqm_schedule_rmidwork(d);
+ }

Let's look at the logic of this.

When the event terminates, then you unconditionally schedule the rmid work
on ALL packages whether the event was freed or not. Really brilliant.

There is no reason to schedule anything if the event was never using any
rmid on a given package or if the RMIDs are not freed because there is a
new group leader.

Will fix to not schedule if event did not use the package.


The NOHZ FULL people will love that extra work activity for nothing.

Also the detection whether the work is already scheduled is intuitive as
usual: is_first_cqmwork() ? What???? !cqmwork_scheduled() would be too
simple to understand, right?

Oh well.

+ }
}

static int intel_cqm_event_init(struct perf_event *event)
@@ -1322,6 +1426,9 @@ static int pkg_data_init_cpu(int cpu)
mutex_init(&pkg_data->pkg_data_mutex);
raw_spin_lock_init(&pkg_data->pkg_data_lock);

+ INIT_DEFERRABLE_WORK(
+ &pkg_data->intel_cqm_rmid_work, intel_cqm_rmid_reuse);

Stop this crappy formatting.

INIT_DEFERRABLE_WORK(&pkg_data->intel_cqm_rmid_work,
intel_cqm_rmid_reuse);

is the canonical way to do line breaks. Is it really that hard?

Will fix the formatting.

Thanks,
Vikas


Thanks,

tglx