[PATCH v3 1/2] cpu/hotplug: Remove the 'cpu' member of cpuhp_cpu_state

From: Steven Price
Date: Mon Apr 11 2022 - 11:22:58 EST


Currently the setting of the 'cpu' member of struct cpuhp_cpu_state in
cpuhp_create() is too late as it is used earlier in _cpu_up(). If the
kzalloc_node() in __smpboot_create_thread() fails then the rollback will
be done with st->cpu==0 causing CPU0 to be erroneously set to be dying,
causing the scheduler to get mightily confused and throw its toys out of
the pram.

However the cpu number is actually available directly, so simply remove
the 'cpu' member and avoid the problem in the first place.

Fixes: 2ea46c6fc945 ("cpumask/hotplug: Fix cpu_dying() state tracking")
Signed-off-by: Steven Price <steven.price@xxxxxxx>
---
kernel/cpu.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 407a2568f35e..5601216eb51b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -70,7 +70,6 @@ struct cpuhp_cpu_state {
bool rollback;
bool single;
bool bringup;
- int cpu;
struct hlist_node *node;
struct hlist_node *last;
enum cpuhp_state cb_state;
@@ -474,7 +473,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif

static inline enum cpuhp_state
-cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
bool bringup = st->state < target;
@@ -485,14 +484,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
st->target = target;
st->single = false;
st->bringup = bringup;
- if (cpu_dying(st->cpu) != !bringup)
- set_cpu_dying(st->cpu, !bringup);
+ if (cpu_dying(cpu) != !bringup)
+ set_cpu_dying(cpu, !bringup);

return prev_state;
}

static inline void
-cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
+ enum cpuhp_state prev_state)
{
bool bringup = !st->bringup;

@@ -519,8 +519,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
}

st->bringup = bringup;
- if (cpu_dying(st->cpu) != !bringup)
- set_cpu_dying(st->cpu, !bringup);
+ if (cpu_dying(cpu) != !bringup)
+ set_cpu_dying(cpu, !bringup);
}

/* Regular hotplug invocation of the AP hotplug thread */
@@ -540,15 +540,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
wait_for_ap_thread(st, st->bringup);
}

-static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
{
enum cpuhp_state prev_state;
int ret;

- prev_state = cpuhp_set_state(st, target);
+ prev_state = cpuhp_set_state(cpu, st, target);
__cpuhp_kick_ap(st);
if ((ret = st->result)) {
- cpuhp_reset_state(st, prev_state);
+ cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
}

@@ -580,7 +581,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;

- return cpuhp_kick_ap(st, st->target);
+ return cpuhp_kick_ap(cpu, st, st->target);
}

static int bringup_cpu(unsigned int cpu)
@@ -703,7 +704,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret, cpu, cpuhp_get_step(st->state)->name,
st->state);

- cpuhp_reset_state(st, prev_state);
+ cpuhp_reset_state(cpu, st, prev_state);
if (can_rollback_cpu(st))
WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
prev_state));
@@ -720,7 +721,6 @@ static void cpuhp_create(unsigned int cpu)

init_completion(&st->done_up);
init_completion(&st->done_down);
- st->cpu = cpu;
}

static int cpuhp_should_run(unsigned int cpu)
@@ -874,7 +874,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
cpuhp_lock_release(true);

trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
- ret = cpuhp_kick_ap(st, st->target);
+ ret = cpuhp_kick_ap(cpu, st, st->target);
trace_cpuhp_exit(cpu, st->state, prev_state, ret);

return ret;
@@ -1106,7 +1106,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
ret, cpu, cpuhp_get_step(st->state)->name,
st->state);

- cpuhp_reset_state(st, prev_state);
+ cpuhp_reset_state(cpu, st, prev_state);

if (st->state < prev_state)
WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
@@ -1133,7 +1133,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,

cpuhp_tasks_frozen = tasks_frozen;

- prev_state = cpuhp_set_state(st, target);
+ prev_state = cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread.
@@ -1164,7 +1164,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
ret = cpuhp_down_callbacks(cpu, st, target);
if (ret && st->state < prev_state) {
if (st->state == CPUHP_TEARDOWN_CPU) {
- cpuhp_reset_state(st, prev_state);
+ cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
} else {
WARN(1, "DEAD callback error for CPU%d", cpu);
@@ -1351,7 +1351,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)

cpuhp_tasks_frozen = tasks_frozen;

- cpuhp_set_state(st, target);
+ cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread once more.
--
2.25.1