Re: [RFC][PATCH 5/6] perf: Extend the mmap control page with time(TSC) fields

From: Stephane Eranian
Date: Wed Dec 28 2011 - 12:55:13 EST


On Mon, Nov 21, 2011 at 2:51 PM, Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> wrote:
> Extend the mmap control page with fields so that userspace can compute
> time deltas relative to the provided time fields.
>
> Currently only implemented for x86 with constant and nonstop TSC.
>

It is not obvious to me how one would use time_mult, time_shift, time_offset
+ TSC to complement time_enabled/time_running to compute the correct
scaling factor. The patch does not include any example. Woud you mind
describing the method?

Thanks.


> Cc: Stephane Eranian <eranian@xxxxxxxxxx>
> Cc: Arun Sharma <asharma@xxxxxx>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> ---
> Âarch/x86/kernel/cpu/perf_event.c | Â 14 ++++++++++++++
> Âinclude/linux/perf_event.h    |  Â4 +++-
> Âkernel/events/core.c       |  21 ++++++++++++++-------
> Â3 files changed, 31 insertions(+), 8 deletions(-)
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -32,6 +32,7 @@
> Â#include <asm/compat.h>
> Â#include <asm/smp.h>
> Â#include <asm/alternative.h>
> +#include <asm/timer.h>
>
> Â#include "perf_event.h"
>
> @@ -1621,6 +1622,19 @@ static struct pmu pmu = {
>    Â.event_idx   Â= x86_pmu_event_idx,
> Â};
>
> +void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
> +{
> + Â Â Â if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â userpg->time_mult = this_cpu_read(cyc2ns);
> + Â Â Â userpg->time_shift = CYC2NS_SCALE_FACTOR;
> + Â Â Â userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
> +}
> +
> Â/*
> Â* callchain support
> Â*/
> Index: linux-2.6/include/linux/perf_event.h
> ===================================================================
> --- linux-2.6.orig/include/linux/perf_event.h
> +++ linux-2.6/include/linux/perf_event.h
> @@ -290,12 +290,14 @@ struct perf_event_mmap_page {
> Â Â Â Â__s64 Â offset; Â Â Â Â Â Â Â Â /* add to hardware event value */
> Â Â Â Â__u64 Â time_enabled; Â Â Â Â Â /* time event active */
> Â Â Â Â__u64 Â time_running; Â Â Â Â Â /* time event on cpu */
> + Â Â Â __u32 Â time_mult, time_shift;
> + Â Â Â __u64 Â time_offset;
>
> Â Â Â Â Â Â Â Â/*
> Â Â Â Â Â Â Â Â * Hole for extension of the self monitor capabilities
> Â Â Â Â Â Â Â Â */
>
> - Â Â Â __u64 Â __reserved[123]; Â Â Â Â/* align to 1k */
> + Â Â Â __u64 Â __reserved[121]; Â Â Â Â/* align to 1k */
>
> Â Â Â Â/*
> Â Â Â Â * Control data for the mmap() data buffer.
> Index: linux-2.6/kernel/events/core.c
> ===================================================================
> --- linux-2.6.orig/kernel/events/core.c
> +++ linux-2.6/kernel/events/core.c
> @@ -3203,17 +3203,22 @@ static int perf_event_index(struct perf_
> Â}
>
> Âstatic void calc_timer_values(struct perf_event *event,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â u64 *now,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âu64 *enabled,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âu64 *running)
> Â{
> - Â Â Â u64 now, ctx_time;
> + Â Â Â u64 ctx_time;
>
> - Â Â Â now = perf_clock();
> - Â Â Â ctx_time = event->shadow_ctx_time + now;
> + Â Â Â *now = perf_clock();
> + Â Â Â ctx_time = event->shadow_ctx_time + *now;
> Â Â Â Â*enabled = ctx_time - event->tstamp_enabled;
> Â Â Â Â*running = ctx_time - event->tstamp_running;
> Â}
>
> +void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
> +{
> +}
> +
> Â/*
> Â* Callers need to ensure there can be no nesting of this function, otherwise
> Â* the seqlock logic goes bad. We can not serialize this because the arch
> @@ -3223,7 +3228,7 @@ void perf_event_update_userpage(struct p
> Â{
> Â Â Â Âstruct perf_event_mmap_page *userpg;
> Â Â Â Âstruct ring_buffer *rb;
> - Â Â Â u64 enabled, running;
> + Â Â Â u64 enabled, running, now;
>
> Â Â Â Ârcu_read_lock();
> Â Â Â Â/*
> @@ -3235,7 +3240,7 @@ void perf_event_update_userpage(struct p
> Â Â Â Â * because of locking issue as we can be called in
> Â Â Â Â * NMI context
> Â Â Â Â */
> - Â Â Â calc_timer_values(event, &enabled, &running);
> + Â Â Â calc_timer_values(event, &now, &enabled, &running);
> Â Â Â Ârb = rcu_dereference(event->rb);
> Â Â Â Âif (!rb)
> Â Â Â Â Â Â Â Âgoto unlock;
> @@ -3260,6 +3265,8 @@ void perf_event_update_userpage(struct p
> Â Â Â Âuserpg->time_running = running +
> Â Â Â Â Â Â Â Â Â Â Â Âatomic64_read(&event->child_total_time_running);
>
> + Â Â Â perf_update_user_clock(userpg, now);
> +
> Â Â Â Âbarrier();
> Â Â Â Â++userpg->lock;
> Â Â Â Âpreempt_enable();
> @@ -3692,7 +3699,7 @@ static void perf_output_read_group(struc
> Âstatic void perf_output_read(struct perf_output_handle *handle,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event)
> Â{
> - Â Â Â u64 enabled = 0, running = 0;
> + Â Â Â u64 enabled = 0, running = 0, now;
> Â Â Â Âu64 read_format = event->attr.read_format;
>
> Â Â Â Â/*
> @@ -3705,7 +3712,7 @@ static void perf_output_read(struct perf
> Â Â Â Â * NMI context
> Â Â Â Â */
> Â Â Â Âif (read_format & PERF_FORMAT_TOTAL_TIMES)
> - Â Â Â Â Â Â Â calc_timer_values(event, &enabled, &running);
> + Â Â Â Â Â Â Â calc_timer_values(event, &now, &enabled, &running);
>
> Â Â Â Âif (event->attr.read_format & PERF_FORMAT_GROUP)
> Â Â Â Â Â Â Â Âperf_output_read_group(handle, event, enabled, running);
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/