[RFC PATCH v2 02/12] smp: unify crash_ and smp_send_stop() logic

From: Cristian Marussi
Date: Fri Sep 13 2019 - 14:20:53 EST


crash_smp_send_stop() logic was fairly similar to smp_send_stop():
a lot of logic and code was duplicated between the two code paths and
across a few different architectures.

Unify this underlying common logic into the existing SMP common stop
code: use a common workhorse function for both paths to perform the
common tasks while taking care to propagate to the underlying
architecture code the intent of the stop operation: a simple stop or
a crash dump stop.

Relocate the __weak crash_smp_send_stop() function from panic.c to smp.c,
since it is the crash dump entry point for the crash stop process and now
calls into this new common logic (only if this latter is enabled by
ARCH_USE_COMMON_SMP_STOP=y).

Introduce a few more helpers so that the architectures willing to use
the common code logic can provide their arch-specific bits to handle
the differences between a stop and a crash stop; architectures can
anyway decide to override as a whole the common logic providing their
own custom solution in crash_smp_send_stop() (as it was before).

Provide also a new common code method to inquiry on the outcome of an
ongoing crash_stop procedure: smp_crash_stop_failed().

Signed-off-by: Cristian Marussi <cristian.marussi@xxxxxxx>
---
v1 --> v2
- using new CONFIG_USE_COMMON_SMP_STOP
- added arch_smp_cpus_crash_complete()
---
include/linux/smp.h | 34 +++++++++++++++++++++++
kernel/panic.c | 26 -----------------
kernel/smp.c | 68 ++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 98 insertions(+), 30 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 381a14bfcd96..0f95bbd2cb5c 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -130,8 +130,36 @@ extern void arch_smp_stop_call(cpumask_t *cpus);
* the local CPU once the other CPUs have been successfully stopped.
*/
void arch_smp_cpus_stop_complete(void);
+
+/*
+ * An Architecture CAN also provide the arch_smp_cpus_crash_complete()
+ * dedicated helper, to perform any final arch-specific operation on
+ * the local CPU once the other CPUs have been successfully crash stopped.
+ * When not overridden by the user, this defaults to call straight away
+ * arch_smp_cpus_stop_complete()
+ */
+void arch_smp_cpus_crash_complete(void);
+
+/*
+ * An Architecture CAN additionally provide the arch_smp_crash_call()
+ * helper which implements the arch-specific crash dump related operations.
+ *
+ * If such arch wants to fully support crash dump, this MUST be provided;
+ * when not provided the crash dump procedure will fallback to behave like
+ * a normal stop. (no saved regs, no arch-specific features disabled)
+ */
+extern void arch_smp_crash_call(cpumask_t *cpus);
+
+/* Helper to query the outcome of an ongoing crash_stop operation */
+bool smp_crash_stop_failed(void);
#endif

+/*
+ * stops all CPUs but the current one propagating to all other CPUs
+ * the information that a crash_kexec is ongoing:
+ */
+void crash_smp_send_stop(void);
+
/*
* sends a 'reschedule' event to another CPU:
*/
@@ -195,6 +223,12 @@ static inline int get_boot_cpu_id(void)

static inline void smp_send_stop(void) { }

+static inline void crash_smp_send_stop(void) { }
+
+#ifdef CONFIG_USE_COMMON_SMP_STOP
+static inline bool smp_crash_stop_failed(void) { }
+#endif
+
/*
* These macros fold the SMP functionality into a single CPU system
*/
diff --git a/kernel/panic.c b/kernel/panic.c
index 057540b6eee9..bc0dbf9c9b75 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -86,32 +86,6 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs)
panic_smp_self_stop();
}

-/*
- * Stop other CPUs in panic. Architecture dependent code may override this
- * with more suitable version. For example, if the architecture supports
- * crash dump, it should save registers of each stopped CPU and disable
- * per-CPU features such as virtualization extensions.
- */
-void __weak crash_smp_send_stop(void)
-{
- static int cpus_stopped;
-
- /*
- * This function can be called twice in panic path, but obviously
- * we execute this only once.
- */
- if (cpus_stopped)
- return;
-
- /*
- * Note smp_send_stop is the usual smp shutdown function, which
- * unfortunately means it may not be hardened to work in a panic
- * situation.
- */
- smp_send_stop();
- cpus_stopped = 1;
-}
-
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);

/*
diff --git a/kernel/smp.c b/kernel/smp.c
index 72f99bf13fd0..b05d2648a168 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -820,8 +820,14 @@ int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
EXPORT_SYMBOL_GPL(smp_call_on_cpu);

#ifdef CONFIG_USE_COMMON_SMP_STOP
+
void __weak arch_smp_cpus_stop_complete(void) { }

+void __weak arch_smp_cpus_crash_complete(void)
+{
+ arch_smp_cpus_stop_complete();
+}
+
static inline bool any_other_cpus_online(cpumask_t *mask,
unsigned int this_cpu_id)
{
@@ -831,6 +837,12 @@ static inline bool any_other_cpus_online(cpumask_t *mask,
return !cpumask_empty(mask);
}

+void __weak arch_smp_crash_call(cpumask_t *cpus)
+{
+ pr_debug("SMP: Using generic %s() as SMP crash call.\n", __func__);
+ arch_smp_stop_call(cpus);
+}
+
/*
* This centralizes the common logic to:
*
@@ -842,7 +854,7 @@ static inline bool any_other_cpus_online(cpumask_t *mask,
* - wait for the stop operation to be completed across all involved CPUs
* monitoring the cpu_online_mask
*/
-void smp_send_stop(void)
+static inline void __smp_send_stop_all(bool reason_crash)
{
unsigned int this_cpu_id;
cpumask_t mask;
@@ -855,8 +867,11 @@ void smp_send_stop(void)

if (system_state <= SYSTEM_RUNNING)
pr_crit("stopping secondary CPUs\n");
- arch_smp_stop_call(&mask);
-
+ /* smp and crash arch-backends helpers are kept distinct */
+ if (!reason_crash)
+ arch_smp_stop_call(&mask);
+ else
+ arch_smp_crash_call(&mask);
/*
* Defaults to wait up to one second for other CPUs to stop;
* architectures can modify the default timeout or request
@@ -881,6 +896,51 @@ void smp_send_stop(void)
cpumask_pr_args(cpu_online_mask));
}
/* Perform final (possibly arch-specific) work on this CPU */
- arch_smp_cpus_stop_complete();
+ if (!reason_crash)
+ arch_smp_cpus_stop_complete();
+ else
+ arch_smp_cpus_crash_complete();
+}
+
+void smp_send_stop(void)
+{
+ __smp_send_stop_all(false);
}
+
+bool __weak smp_crash_stop_failed(void)
+{
+ return (num_online_cpus() > cpu_online(smp_processor_id()));
+}
+#endif
+
+/*
+ * Stop other CPUs while passing down the additional information that a
+ * crash_kexec is ongoing: it's up to the architecture implementation
+ * decide what to do.
+ *
+ * For example, Architectures supporting crash dump should provide
+ * specialized support for saving registers and disabling per-CPU features
+ * like virtualization extensions.
+ *
+ * Behaviour in the CONFIG_USE_COMMON_SMP_STOP=n case is preserved
+ * as it was before.
+ */
+void __weak crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+#ifdef CONFIG_USE_COMMON_SMP_STOP
+ __smp_send_stop_all(true);
+#else
+ smp_send_stop();
#endif
+
+ cpus_stopped = 1;
+}
--
2.17.1