Re: [PATCH v6 1/4] sched/clock: interface to allow timestamps early in boot

From: Peter Zijlstra
Date: Wed Sep 27 2017 - 08:59:28 EST


On Wed, Aug 30, 2017 at 02:03:22PM -0400, Pavel Tatashin wrote:
> In Linux printk() can output timestamps next to every line. This is very
> useful for tracking regressions, and finding places that can be optimized.
> However, the timestamps are available only later in boot. On smaller
> machines it is insignificant amount of time, but on larger it can be many
> seconds or even minutes into the boot process.
>
> This patch adds an interface for platforms with unstable sched clock to
> show timestamps early in boot. In order to get this functionality a
> platform must:
>
> - Implement u64 sched_clock_early()
> Clock that returns monotonic time
>
> - Call sched_clock_early_init()
> Tells sched clock that the early clock can be used
>
> - Call sched_clock_early_fini()
> Tells sched clock that the early clock is finished, and sched clock
> should hand over the operation to permanent clock.
>
> Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>

Urgh, that's horrific.

Can't we simply make sched_clock() go earlier? (we're violating "notsc"
in any case and really should kill that option).

Then we can do something like so on top...


---
include/linux/sched/clock.h | 6 +++++-
kernel/sched/clock.c | 42 +++++++++++++++++++++++++++---------------
2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index a55600ffdf4b..986d14a208e7 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -20,9 +20,12 @@ extern u64 running_clock(void);
extern u64 sched_clock_cpu(int cpu);


-extern void sched_clock_init(void);

#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init(void)
+{
+}
+
static inline void sched_clock_tick(void)
{
}
@@ -49,6 +52,7 @@ static inline u64 local_clock(void)
return sched_clock();
}
#else
+extern void sched_clock_init(void);
extern int sched_clock_stable(void);
extern void clear_sched_clock_stable(void);

diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index ca0f8fc945c6..47d13d37f2f1 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -80,11 +80,6 @@ EXPORT_SYMBOL_GPL(sched_clock);

__read_mostly int sched_clock_running;

-void sched_clock_init(void)
-{
- sched_clock_running = 1;
-}
-
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/*
* We must start with !__sched_clock_stable because the unstable -> stable
@@ -211,6 +206,31 @@ void clear_sched_clock_stable(void)
__clear_sched_clock_stable();
}

+static void __sched_clock_gtod_offset(void)
+{
+ u64 gtod, clock;
+
+ local_irq_disable();
+ gtod = ktime_get_ns();
+ clock = sched_clock();
+ __gtod_offset = (clock + __sched_clock_offset) - gtod;
+ local_irq_enable();
+}
+
+void sched_clock_init(void)
+{
+ /*
+ * Set __gtod_offset such that once we mark sched_clock_running,
+ * sched_clock_tick() continues where sched_clock() left off.
+ *
+ * Even if TSC is buggered, we're still UP at this point so it
+ * can't really be out of sync.
+ */
+ __sched_clock_gtod_offset();
+ barrier();
+ sched_clock_running = 1;
+}
+
/*
* We run this as late_initcall() such that it runs after all built-in drivers,
* notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
@@ -363,7 +383,7 @@ u64 sched_clock_cpu(int cpu)
return sched_clock() + __sched_clock_offset;

if (unlikely(!sched_clock_running))
- return 0ull;
+ return sched_clock();

preempt_disable_notrace();
scd = cpu_sdc(cpu);
@@ -397,7 +417,6 @@ void sched_clock_tick(void)

void sched_clock_tick_stable(void)
{
- u64 gtod, clock;

if (!sched_clock_stable())
return;
@@ -409,11 +428,7 @@ void sched_clock_tick_stable(void)
* good moment to update our __gtod_offset. Because once we find the
* TSC to be unstable, any computation will be computing crap.
*/
- local_irq_disable();
- gtod = ktime_get_ns();
- clock = sched_clock();
- __gtod_offset = (clock + __sched_clock_offset) - gtod;
- local_irq_enable();
+ __sched_clock_gtod_offset();
}

/*
@@ -448,9 +463,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);

u64 sched_clock_cpu(int cpu)
{
- if (unlikely(!sched_clock_running))
- return 0;
-
return sched_clock();
}