Re: [RFC][PATCH] sched: Start stopper early

From: Oleg Nesterov
Date: Thu Oct 08 2015 - 14:51:07 EST


On 10/08, Oleg Nesterov wrote:
>
> To avoid the confusion, let me repeat that I am not arguing with
> this change, perhaps it makes sense too.
>
> But unless I missed something it is not really correct and can't
> fix the problem. So I still think the series I sent should be
> applied first.

...

> But note that kthread_unpark() will only wake the stopper thread up.
>
> cpu_stopper->enabled is still false, and it will be false until
> smpboot_unpark_thread() calls ->pre_unpark() later. And this means
> that stop_two_cpus()->cpu_stop_queue_work() can silently fail until
> then. So I don't this patch can fix the problem.

So I'd suggest something like the patch below (uncompiled/untested)
on top of the series I sent.

Note also that (at least imo) it looks like a cleanup, and even
->selfparking becomes more consistent.

What do you think?

Oleg.


--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -40,7 +40,6 @@ struct smp_hotplug_thread {
void (*cleanup)(unsigned int cpu, bool online);
void (*park)(unsigned int cpu);
void (*unpark)(unsigned int cpu);
- void (*pre_unpark)(unsigned int cpu);
bool selfparking;
const char *thread_comm;
};
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 7b76362..0adedca 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -34,6 +34,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
void stop_machine_park(int cpu);
+void stop_machine_unpark(int cpu);

#else /* CONFIG_SMP */

--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -221,9 +221,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp
{
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);

- if (ht->pre_unpark)
- ht->pre_unpark(cpu);
- kthread_unpark(tsk);
+ if (!ht->selfparking)
+ kthread_unpark(tsk);
}

void smpboot_unpark_threads(unsigned int cpu)
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -513,6 +513,14 @@ static void cpu_stop_unpark(unsigned int cpu)
spin_unlock_irq(&stopper->lock);
}

+void stop_machine_unpark(int cpu)
+{
+ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+
+ cpu_stop_unpark(cpu);
+ kthread_unpark(stopper->thread);
+}
+
static struct smp_hotplug_thread cpu_stop_threads = {
.store = &cpu_stopper.thread,
.thread_should_run = cpu_stop_should_run,
@@ -521,7 +529,6 @@ static struct smp_hotplug_thread cpu_stop_threads = {
.create = cpu_stop_create,
.setup = cpu_stop_unpark,
.park = cpu_stop_park,
- .pre_unpark = cpu_stop_unpark,
.selfparking = true,
};

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5515,11 +5515,14 @@ static void set_cpu_rq_start_time(void)
static int sched_cpu_active(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
+ int cpu = (long)hcpu;
+
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_STARTING:
set_cpu_rq_start_time();
return NOTIFY_OK;
case CPU_ONLINE:
+ stop_machine_unpark(cpu);
/*
* At this point a starting CPU has marked itself as online via
* set_cpu_online(). But it might not yet have marked itself
@@ -5528,7 +5531,7 @@ static int sched_cpu_active(struct notif
* Thus, fall-through and help the starting CPU along.
*/
case CPU_DOWN_FAILED:
- set_cpu_active((long)hcpu, true);
+ set_cpu_active(cpu, true);
return NOTIFY_OK;
default:
return NOTIFY_DONE;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/