[PATCH] cpu/hotplug: Do not bail-out during DYING/STARTING sections

From: Vincent Donnefort
Date: Thu May 19 2022 - 11:44:15 EST


The DYING/STARTING callbacks are not expected to fail. However, callbacks
registered in those sections can still report errors. There's nothing the
hotplug machinery can do, so let's just proceed and log the failures.

Fixes: 453e41085183 (cpu/hotplug: Add cpuhp_invoke_callback_range())
Reported-by: Derek Dolney <z23@xxxxxxxxxx>
Signed-off-by: Vincent Donnefort <vdonnefort@xxxxxxxxxx>

diff --git a/kernel/cpu.c b/kernel/cpu.c
index bbad5e375d3b..b23a83d869a9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -663,21 +663,52 @@ static bool cpuhp_next_state(bool bringup,
return true;
}

-static int cpuhp_invoke_callback_range(bool bringup,
- unsigned int cpu,
- struct cpuhp_cpu_state *st,
- enum cpuhp_state target)
+static int _cpuhp_invoke_callback_range(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target,
+ bool no_fail)
{
enum cpuhp_state state;
- int err = 0;
+ int ret = 0;

while (cpuhp_next_state(bringup, &state, st, target)) {
+ int err;
+
err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
- if (err)
+ if (!err)
+ continue;
+
+ if (no_fail) {
+ pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
+ cpu, bringup ? "UP" : "DOWN",
+ cpuhp_get_step(st->state)->name,
+ st->state, err);
+ ret = -1;
+ } else {
+ ret = err;
break;
+ }
}

- return err;
+ return ret;
+}
+
+static inline int cpuhp_invoke_callback_range(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
+{
+ return _cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
+}
+
+static inline void
+cpuhp_invoke_callback_range_nofail(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
+{
+ WARN_ON_ONCE(_cpuhp_invoke_callback_range(bringup, cpu, st, target, true));
}

static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
@@ -999,7 +1030,6 @@ static int take_cpu_down(void *_param)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
int err, cpu = smp_processor_id();
- int ret;

/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
@@ -1012,13 +1042,11 @@ static int take_cpu_down(void *_param)
*/
WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));

- /* Invoke the former CPU_DYING callbacks */
- ret = cpuhp_invoke_callback_range(false, cpu, st, target);
-
/*
+ * Invoke the former CPU_DYING callbacks
* DYING must not fail!
*/
- WARN_ON_ONCE(ret);
+ cpuhp_invoke_callback_range_nofail(false, cpu, st, target);

/* Give up timekeeping duties */
tick_handover_do_timer();
@@ -1296,16 +1324,14 @@ void notify_cpu_starting(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
- int ret;

rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
cpumask_set_cpu(cpu, &cpus_booted_once_mask);
- ret = cpuhp_invoke_callback_range(true, cpu, st, target);

/*
* STARTING must not fail!
*/
- WARN_ON_ONCE(ret);
+ cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
}

/*
--
2.36.1.124.g0e6072fb45-goog