Re: [PATCH v8 6/6] x86/tsc: use tsc early

From: Dou Liyang
Date: Sun Nov 12 2017 - 22:47:58 EST


Hi Pavel,

At 11/09/2017 11:02 AM, Pavel Tatashin wrote:
This patch adds early clock feature to x86 platforms.

tsc_early_init():
Determines offset, shift and multiplier for the early clock based on the
TSC frequency.

tsc_early_fini()
Implement the finish part of early tsc feature, prints message about the
offset, which can be useful to find out how much time was spent in post and
boot manager (if TSC starts from 0 during boot)

sched_clock_early():
TSC based implementation of early clock and is called from sched_clock().

Call tsc_early_init() to initialize early boot time stamps functionality on
the supported x86 platforms, and call tsc_early_fini() to finish this
feature after permanent clock has been initialized. The supported x86
systems are those where TSC frequency is determined early in boot.

Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
---
arch/x86/include/asm/paravirt.h | 2 +-
arch/x86/kernel/tsc.c | 85 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..b4ba220163ce 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -171,7 +171,7 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)

static inline unsigned long long paravirt_sched_clock(void)
{
- return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
+ return PVOP_CALL0(unsigned long long, pv_time_ops.active_sched_clock);
}


Should in the 5th patch

struct static_key;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index dbce6fa32aa9..24da1ff96481 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -181,6 +181,80 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_
local_irq_restore(flags);
}

+#ifdef CONFIG_X86_TSC
+static struct cyc2ns_data cyc2ns_early;
+
+static u64 sched_clock_early(void)
+{
+ u64 ns = mul_u64_u32_shr(rdtsc(), cyc2ns_early.cyc2ns_mul,
+ cyc2ns_early.cyc2ns_shift);
+ return ns + cyc2ns_early.cyc2ns_offset;
+}
+
+#ifdef CONFIG_PARAVIRT
+static inline void __init tsc_early_enable(void)
+{
+ pv_time_ops.active_sched_clock = sched_clock_early;
+}
+
+static inline void __init tsc_early_disable(void)
+{
+ pv_time_ops.active_sched_clock = pv_time_ops.sched_clock;
+}
+#else /* CONFIG_PARAVIRT */
+/*
+ * For native clock we use two switches static and dynamic, the static switch is
+ * initially true, so we check the dynamic switch, which is initially false.
+ * Later when early clock is disabled, we can alter the static switch in order
+ * to avoid branch check on every sched_clock() call.
+ */
+static bool __tsc_early;
+static DEFINE_STATIC_KEY_TRUE(__tsc_early_static);
+
+static inline void __init tsc_early_enable(void)
+{
+ __tsc_early = true;
+}
+
+static inline void __init tsc_early_disable(void)
+{
+ __tsc_early = false;
+ static_branch_disable(&__tsc_early_static);
+}
+#endif /* CONFIG_PARAVIRT */
+
+/*
+ * Initialize clock for early time stamps
+ */
+static void __init tsc_early_init(unsigned int khz)
+{
+ clocks_calc_mult_shift(&cyc2ns_early.cyc2ns_mul,
+ &cyc2ns_early.cyc2ns_shift,
+ khz, NSEC_PER_MSEC, 0);
+ cyc2ns_early.cyc2ns_offset = -sched_clock_early();
+ tsc_early_enable();
+}
+
+static void __init tsc_early_fini(void)
+{
+ unsigned long long t;
+ unsigned long r;
+
+ /* We did not have early sched clock if multiplier is 0 */
+ if (cyc2ns_early.cyc2ns_mul == 0) {
+ tsc_early_disable();
+ return;
+ }
+
+ t = -cyc2ns_early.cyc2ns_offset;
+ r = do_div(t, NSEC_PER_SEC);
+
+ tsc_early_disable();
+ __sched_clock_offset = sched_clock_early() - sched_clock();
+ pr_info("sched clock early is finished, offset [%lld.%09lds]\n", t, r);
+}

Add definitions for the situation of X86_TSC = no :

#else /* CONFIG_X86_TSC */
static inline void tsc_early_init(unsigned int khz) { }
static inline void tsc_early_fini(void) { }

+#endif /* CONFIG_X86_TSC */
+


/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -193,6 +267,13 @@ u64 native_sched_clock(void)
return cycles_2_ns(tsc_now);
}

+#if !defined(CONFIG_PARAVIRT) && defined(CONFIG_X86_TSC)
+ if (static_branch_unlikely(&__tsc_early_static)) {
+ if (__tsc_early)
+ return sched_clock_early();
+ }
+#endif /* !CONFIG_PARAVIRT && CONFIG_X86_TSC */
+
/*
* Fall back to jiffies if there's no TSC available:
* ( But note that we still use it if the TSC is marked
@@ -1274,6 +1355,7 @@ void __init tsc_early_delay_calibrate(void)
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
+ tsc_early_init(tsc_khz);
}

void __init tsc_init(void)
@@ -1283,6 +1365,7 @@ void __init tsc_init(void)

if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
+ tsc_early_fini();

According to tsc_early_delay_calibrate(), if (!boot_cpu_has(X86_FEATURE_TSC || !tsc_khz ), tsc_early_init(tsc_khz)
will never be called, so here is redundant.

return;
}

@@ -1302,6 +1385,7 @@ void __init tsc_init(void)
if (!tsc_khz) {
mark_tsc_unstable("could not calculate TSC khz");
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
+ tsc_early_fini();

ditto

return;
}

@@ -1341,6 +1425,7 @@ void __init tsc_init(void)
mark_tsc_unstable("TSCs unsynchronized");

detect_art();
+ tsc_early_fini();

IMO, Just keep the finishing call here is enough.

BTW, seems you forgot to cc Peter Zijlstra <peterz@xxxxxxxxxxxxx> in both V7 and V8 patchsets.

Thanks,
dou
}

#ifdef CONFIG_SMP