[patch V2 03/38] x86/smpboot: Avoid pointless delay calibration if TSC is synchronized
From: Thomas Gleixner
Date: Thu May 04 2023 - 15:02:18 EST
From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
When TSC is synchronized across sockets then there is no reason to
calibrate the delay for the first CPU which comes up on a socket.
Just reuse the existing calibration value.
This removes 100ms pointlessly wasted time from CPU hotplug per socket.
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/kernel/smpboot.c | 38 ++++++++++++++++++++++++--------------
arch/x86/kernel/tsc.c | 20 ++++++++++++++++----
2 files changed, 40 insertions(+), 18 deletions(-)
---
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -178,10 +178,7 @@ static void smp_callin(void)
*/
apic_ap_setup();
- /*
- * Save our processor parameters. Note: this information
- * is needed for clock calibration.
- */
+ /* Save our processor parameters. */
smp_store_cpu_info(cpuid);
/*
@@ -192,14 +189,6 @@ static void smp_callin(void)
ap_init_aperfmperf();
- /*
- * Get our bogomips.
- * Update loops_per_jiffy in cpu_data. Previous call to
- * smp_store_cpu_info() stored a value that is close but not as
- * accurate as the value just calculated.
- */
- calibrate_delay();
- cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
pr_debug("Stack at about %p\n", &cpuid);
wmb();
@@ -212,8 +201,24 @@ static void smp_callin(void)
cpumask_set_cpu(cpuid, cpu_callin_mask);
}
+static void ap_calibrate_delay(void)
+{
+ /*
+ * Calibrate the delay loop and update loops_per_jiffy in cpu_data.
+ * smp_store_cpu_info() stored a value that is close but not as
+ * accurate as the value just calculated.
+ *
+ * As this is invoked after the TSC synchronization check,
+ * calibrate_delay_is_known() will skip the calibration routine
+ * when TSC is synchronized across sockets.
+ */
+ calibrate_delay();
+ cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
+}
+
static int cpu0_logical_apicid;
static int enable_start_cpu0;
+
/*
* Activate a secondary processor.
*/
@@ -240,10 +245,15 @@ static void notrace start_secondary(void
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
+ /* Check TSC synchronization with the control CPU: */
+ check_tsc_sync_target();
+
/*
- * Check TSC synchronization with the boot CPU:
+ * Calibrate the delay loop after the TSC synchronization check.
+ * This allows to skip the calibration when TSC is synchronized
+ * across sockets.
*/
- check_tsc_sync_target();
+ ap_calibrate_delay();
speculative_store_bypass_ht_init();
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1598,10 +1598,7 @@ void __init tsc_init(void)
#ifdef CONFIG_SMP
/*
- * If we have a constant TSC and are using the TSC for the delay loop,
- * we can skip clock calibration if another cpu in the same socket has already
- * been calibrated. This assumes that CONSTANT_TSC applies to all
- * cpus in the socket - this should be a safe assumption.
+ * Check whether existing calibration data can be reused.
*/
unsigned long calibrate_delay_is_known(void)
{
@@ -1609,6 +1606,21 @@ unsigned long calibrate_delay_is_known(v
int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
const struct cpumask *mask = topology_core_cpumask(cpu);
+ /*
+ * If TSC has constant frequency and TSC is synchronized across
+ * sockets then reuse CPU0 calibration.
+ */
+ if (constant_tsc && !tsc_unstable)
+ return cpu_data(0).loops_per_jiffy;
+
+ /*
+ * If TSC has constant frequency and TSC is not synchronized across
+ * sockets and this is not the first CPU in the socket, then reuse
+ * the calibration value of an already online CPU on that socket.
+ *
+ * This assumes that CONSTANT_TSC is consistent for all CPUs in a
+ * socket.
+ */
if (!constant_tsc || !mask)
return 0;