[RFC][PATCH 5/8] perf/x86/intel: Optimize intel_get_excl_constraints()

From: Peter Zijlstra
Date: Thu Mar 14 2019 - 09:11:53 EST


Avoid the POPCNT by noting we can decrement the weight for each
cleared bit.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/events/intel/core.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)

--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2838,7 +2838,7 @@ intel_get_excl_constraints(struct cpu_hw
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int is_excl, i;
+ int is_excl, i, w;

/*
* validating a group does not require
@@ -2894,36 +2894,40 @@ intel_get_excl_constraints(struct cpu_hw
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
+ w = c->weight;
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
}

/*
- * recompute actual bit weight for scheduling algorithm
- */
- c->weight = hweight64(c->idxmsk64);
-
- /*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (c->weight == 0)
+ if (!w)
c = &emptyconstraint;

+ c->weight = w;
+
return c;
}