[RFC PATCH V2 4/9] perf/x86: Enable post-processing monotonic raw conversion

From: kan . liang
Date: Mon Feb 13 2023 - 14:08:17 EST


From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

The raw HW time is from TSC on X86. Preload the HW time for each sample,
once the hw_time is set with the monotonic raw clock by the new perf
tool. Also, dump the conversion information into mmap_page.

For the legacy perf tool which doesn't know the hw_time, nothing is
changed.

Move the x86_pmu_sample_preload() before setup_pebs_time() to utilize
the TSC from a PEBS record.

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
Cc: Ravi Bangoria <ravi.bangoria@xxxxxxx>
---
arch/x86/events/core.c | 10 ++++++++++
arch/x86/events/intel/ds.c | 14 +++++++++++---
arch/x86/events/perf_event.h | 12 ++++++++++++
3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b19ac54ebeea..7c1dfb8c763d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2740,6 +2740,16 @@ void arch_perf_update_userpage(struct perf_event *event,
if (!event->attr.use_clockid) {
userpg->cap_user_time_zero = 1;
userpg->time_zero = offset;
+ } else if (perf_event_hw_time(event)) {
+ struct ktime_conv mono;
+
+ userpg->cap_user_time_mono_raw = 1;
+ ktime_get_fast_mono_raw_conv(&mono);
+ userpg->time_mono_last = mono.cycle_last;
+ userpg->time_mono_mult = mono.mult;
+ userpg->time_mono_shift = mono.shift;
+ userpg->time_mono_nsec = mono.xtime_nsec;
+ userpg->time_mono_base = mono.base;
}

cyc2ns_read_end();
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2f59573ed463..10d4b63c891f 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1574,6 +1574,12 @@ static void setup_pebs_time(struct perf_event *event,
struct perf_sample_data *data,
u64 tsc)
{
+ u64 time = tsc;
+
+ /* Perf tool does the conversion. No conversion here. */
+ if (perf_event_hw_time(event))
+ goto done;
+
/* Converting to a user-defined clock is not supported yet. */
if (event->attr.use_clockid != 0)
return;
@@ -1588,7 +1594,9 @@ static void setup_pebs_time(struct perf_event *event,
if (!using_native_sched_clock() || !sched_clock_stable())
return;

- data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
+ time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
+done:
+ data->time = time;
data->sample_flags |= PERF_SAMPLE_TIME;
}

@@ -1733,6 +1741,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
}
}

+ x86_pmu_sample_preload(data, event, cpuc);
+
/*
* v3 supplies an accurate time stamp, so we use that
* for the time stamp.
@@ -1741,8 +1751,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
*/
if (x86_pmu.intel_cap.pebs_format >= 3)
setup_pebs_time(event, data, pebs->tsc);
-
- x86_pmu_sample_preload(data, event, cpuc);
}

static void adaptive_pebs_save_regs(struct pt_regs *regs,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ae6ec58fde14..0486ee6a7605 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1185,12 +1185,24 @@ int x86_pmu_handle_irq(struct pt_regs *regs);
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
u64 intel_ctrl);

+static inline bool perf_event_hw_time(struct perf_event *event)
+{
+ return (event->attr.hw_time &&
+ event->attr.use_clockid &&
+ (event->attr.clockid == CLOCK_MONOTONIC_RAW));
+}
+
static inline void x86_pmu_sample_preload(struct perf_sample_data *data,
struct perf_event *event,
struct cpu_hw_events *cpuc)
{
if (has_branch_stack(event))
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+
+ if (perf_event_hw_time(event)) {
+ data->time = rdtsc();
+ data->sample_flags |= PERF_SAMPLE_TIME;
+ }
}

extern struct event_constraint emptyconstraint;
--
2.35.1