Re: [PATCH 08/10] perf_event: Optimize the constraint searching bits

From: Stephane Eranian
Date: Fri Jan 22 2010 - 11:08:20 EST


On Fri, Jan 22, 2010 at 4:50 PM, Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> wrote:
> Instead of copying bitmasks around, pass pointers to the constraint structure.
>
The reason I changed that is because of the AMD situation where constraints
are dynamically generated. So we need to get some storage from somwhere.
See my AMD NB patch,


> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> LKML-Reference: <new-submission>
> ---
> Âarch/x86/kernel/cpu/perf_event.c | Â 81 ++++++++++++++++++---------------------
> Â1 file changed, 38 insertions(+), 43 deletions(-)
>
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -135,12 +138,14 @@ struct x86_pmu {
> Â Â Â Âu64 Â Â Â Â Â Â intel_ctrl;
>    Âvoid      Â(*enable_bts)(u64 config);
>    Âvoid      Â(*disable_bts)(void);
> -    void      Â(*get_event_constraints)(struct cpu_hw_events *cpuc,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct perf_event *event,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âunsigned long *idxmsk);
> +
> + Â Â Â struct event_constraint *
> + Â Â Â Â Â Â Â Â Â Â Â (*get_event_constraints)(struct cpu_hw_events *cpuc,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct perf_event *event);
> +
>    Âvoid      Â(*put_event_constraints)(struct cpu_hw_events *cpuc,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event);
> - Â Â Â const struct event_constraint *event_constraints;
> + Â Â Â struct event_constraint *event_constraints;
> Â};
>
> Âstatic struct x86_pmu x86_pmu __read_mostly;
> @@ -1244,18 +1249,16 @@ static inline int is_x86_event(struct pe
> Âstatic int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
> Â{
> Â Â Â Âint i, j , w, num;
> - Â Â Â int weight, wmax;
> - Â Â Â unsigned long *c;
> - Â Â Â unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> + Â Â Â int weight, wmax;
> + Â Â Â struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
> Â Â Â Âunsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> Â Â Â Âstruct hw_perf_event *hwc;
>
> Â Â Â Âbitmap_zero(used_mask, X86_PMC_IDX_MAX);
>
> Â Â Â Âfor (i = 0; i < n; i++) {
> - Â Â Â Â Â Â Â x86_pmu.get_event_constraints(cpuc,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â cpuc->event_list[i],
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â constraints[i]);
> + Â Â Â Â Â Â Â constraints[i] =
> + Â Â Â Â Â Â Â Â x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
> Â Â Â Â}
>
> Â Â Â Â/*
> @@ -1270,7 +1273,7 @@ static int x86_schedule_events(struct cp
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
>
> Â Â Â Â Â Â Â Â/* constraint still honored */
> - Â Â Â Â Â Â Â if (!test_bit(hwc->idx, c))
> + Â Â Â Â Â Â Â if (!test_bit(hwc->idx, c->idxmsk))
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
>
> Â Â Â Â Â Â Â Â/* not already used */
> @@ -1323,11 +1326,11 @@ static int x86_schedule_events(struct cp
> Â Â Â Â Â Â Â Â Â Â Â Âc = constraints[i];
> Â Â Â Â Â Â Â Â Â Â Â Âhwc = &cpuc->event_list[i]->hw;
>
> - Â Â Â Â Â Â Â Â Â Â Â weight = bitmap_weight(c, X86_PMC_IDX_MAX);
> + Â Â Â Â Â Â Â Â Â Â Â weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX);
> Â Â Â Â Â Â Â Â Â Â Â Âif (weight != w)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
>
> - Â Â Â Â Â Â Â Â Â Â Â for_each_bit(j, c, X86_PMC_IDX_MAX) {
> + Â Â Â Â Â Â Â Â Â Â Â for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âif (!test_bit(j, used_mask))
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> Â Â Â Â Â Â Â Â Â Â Â Â}
> @@ -2165,11 +2167,13 @@ perf_event_nmi_handler(struct notifier_b
> Â Â Â Âreturn NOTIFY_STOP;
> Â}
>
> +static struct event_constraint unconstrained;
> +
> Âstatic struct event_constraint bts_constraint =
> Â Â Â ÂEVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
>
> -static int intel_special_constraints(struct perf_event *event,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âunsigned long *idxmsk)
> +static struct event_constraint *
> +intel_special_constraints(struct perf_event *event)
> Â{
> Â Â Â Âunsigned int hw_event;
>
> @@ -2179,46 +2183,34 @@ static int intel_special_constraints(str
> Â Â Â Â Â Â Â Â Â Â Âx86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
> Â Â Â Â Â Â Â Â Â Â (event->hw.sample_period == 1))) {
>
> - Â Â Â Â Â Â Â bitmap_copy((unsigned long *)idxmsk,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â (unsigned long *)bts_constraint.idxmsk,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â X86_PMC_IDX_MAX);
> - Â Â Â Â Â Â Â return 1;
> + Â Â Â Â Â Â Â return &bts_constraint;
> Â Â Â Â}
> - Â Â Â return 0;
> + Â Â Â return NULL;
> Â}
>
> -static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â unsigned long *idxmsk)
> +static struct event_constraint *
> +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> Â{
> - Â Â Â const struct event_constraint *c;
> + Â Â Â struct event_constraint *c;
>
> - Â Â Â /*
> - Â Â Â Â* cleanup bitmask
> - Â Â Â Â*/
> - Â Â Â bitmap_zero(idxmsk, X86_PMC_IDX_MAX);
> -
> - Â Â Â if (intel_special_constraints(event, idxmsk))
> - Â Â Â Â Â Â Â return;
> + Â Â Â c = intel_special_constraints(event);
> + Â Â Â if (c)
> + Â Â Â Â Â Â Â return c;
>
> Â Â Â Âif (x86_pmu.event_constraints) {
> Â Â Â Â Â Â Â Âfor_each_event_constraint(c, x86_pmu.event_constraints) {
> - Â Â Â Â Â Â Â Â Â Â Â if ((event->hw.config & c->cmask) == c->code) {
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX);
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return;
> - Â Â Â Â Â Â Â Â Â Â Â }
> + Â Â Â Â Â Â Â Â Â Â Â if ((event->hw.config & c->cmask) == c->code)
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return c;
> Â Â Â Â Â Â Â Â}
> Â Â Â Â}
> - Â Â Â /* no constraints, means supports all generic counters */
> - Â Â Â bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
> +
> + Â Â Â return &unconstrained;
> Â}
>
> -static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â unsigned long *idxmsk)
> +static struct event_constraint *
> +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> Â{
> - Â Â Â /* no constraints, means supports all generic counters */
> - Â Â Â bitmap_fill(idxmsk, x86_pmu.num_events);
> + Â Â Â return &unconstrained;
> Â}
>
> Âstatic int x86_event_sched_in(struct perf_event *event,
> @@ -2586,6 +2578,9 @@ void __init init_hw_perf_events(void)
> Â Â Â Âperf_events_lapic_init();
> Â Â Â Âregister_die_notifier(&perf_event_nmi_notifier);
>
> + Â Â Â unconstrained = (struct event_constraint)
> + Â Â Â Â Â Â Â EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
> +
> Â Â Â Âpr_info("... version: Â Â Â Â Â Â Â Â%d\n", Â Â x86_pmu.version);
> Â Â Â Âpr_info("... bit width: Â Â Â Â Â Â Â%d\n", Â Â x86_pmu.event_bits);
> Â Â Â Âpr_info("... generic registers: Â Â Â%d\n", Â Â x86_pmu.num_events);
>
> --
>
>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'OpÃra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks
èº{.nÇ+‰·Ÿ®‰­†+%ŠËlzwm…ébëæìr¸›zX§»®w¥Š{ayºÊÚë,j­¢f£¢·hš‹àz¹®w¥¢¸ ¢·¦j:+v‰¨ŠwèjØm¶Ÿÿ¾«‘êçzZ+ƒùšŽŠÝj"ú!¶iO•æ¬z·švØ^¶m§ÿðà nÆàþY&—