Re: [RFC 1/2] x86/tsc: add a timer to make sure tsc_adjust is always checked

From: Feng Tang
Date: Sun Apr 11 2021 - 03:21:34 EST


On Sat, Apr 10, 2021 at 08:46:38PM +0200, Thomas Gleixner wrote:
> Feng,
>
> On Sat, Apr 10 2021 at 22:38, Feng Tang wrote:
> > On Sat, Apr 10, 2021 at 11:27:11AM +0200, Thomas Gleixner wrote:
> >> > +static int __init start_sync_check_timer(void)
> >> > +{
> >> > + if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
> >> > + return 0;
> >> > +
> >> > + timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
> >> > + tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
> >> > + add_timer(&tsc_sync_check_timer);
> >> > +
> >> > + return 0;
> >> > +}
> >> > +late_initcall(start_sync_check_timer);
> >>
> >> So right now, if someone adds 'tsc=reliable' on the kernel command line
> >> then all of the watchdog checking, except for the idle enter TSC_ADJUST
> >> check is disabled. The NOHZ full people are probably going to be pretty
> >> unhappy about yet another unconditional timer they have to chase down.
> >>
> >> So this needs some more thought.
> >
> > 'tsc=reliable' in cmdline will set 'tsc_clocksource_reliable' to 1, so
> > we can skip starting this timer if 'tsc_clocksource_reliable==1' ?
>
> Then we can just ignore that patch alltogether because of 2/2 doing:
>
> + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
> + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
> + boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
> + nr_online_nodes <= 2)
> + tsc_clocksource_reliable = 1;
>
> ....
>
> I said for a reason:

Sorry, I missed that part and should have put more thought on it,
which is much trickier than I thought.

In the very first patch I set 'tsc_clocksource_reliable' to 1 to
try reusing the logic of clearing CLOCK_SOURCE_MUST_VERIFY bit,
and now we may need to decouple them.

One thing I can think of now is something below:

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index f70dffc..bfd013b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1177,6 +1177,12 @@ void mark_tsc_unstable(char *reason)

EXPORT_SYMBOL_GPL(mark_tsc_unstable);

+static void __init tsc_skip_watchdog_verify(void)
+{
+ clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+}
+
static void __init check_system_tsc_reliable(void)
{
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
@@ -1193,6 +1199,17 @@ static void __init check_system_tsc_reliable(void)
#endif
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
tsc_clocksource_reliable = 1;
+
+ /*
+ * Ideally the socket number should be checked, but this is called
+ * by tsc_init() which is in early boot phase and the socket numbers
+ * may not be available. Use 'nr_online_nodes' as a fallback solution
+ */
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+ boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
+ boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
+ nr_online_nodes <= 2)
+ tsc_skip_watchdog_verify();
}

/*
@@ -1384,9 +1401,6 @@ static int __init init_tsc_clocksource(void)
if (tsc_unstable)
goto unreg;

- if (tsc_clocksource_reliable || no_tsc_watchdog)
- clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;

@@ -1524,7 +1538,7 @@ void __init tsc_init(void)
}

if (tsc_clocksource_reliable || no_tsc_watchdog)
- clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+ tsc_skip_watchdog_verify();

clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art();

Thanks,
Feng

> >> So this needs some more thought.
>
> Thanks,
>
> tglx