[tip:sched/core] kernel/cpu: Allow non-zero CPU to be primary for suspend / kexec freeze

From: tip-bot for Nicholas Piggin
Date: Fri May 03 2019 - 13:47:41 EST


Commit-ID: 9ca12ac04bb7d7cfb28aa549dcd3d15761f15543
Gitweb: https://git.kernel.org/tip/9ca12ac04bb7d7cfb28aa549dcd3d15761f15543
Author: Nicholas Piggin <npiggin@xxxxxxxxx>
AuthorDate: Thu, 11 Apr 2019 13:34:46 +1000
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 3 May 2019 19:42:58 +0200

kernel/cpu: Allow non-zero CPU to be primary for suspend / kexec freeze

This patch provides an arch option, ARCH_SUSPEND_NONZERO_CPU, to
opt-in to allowing suspend to occur on one of the housekeeping CPUs
rather than hardcoded CPU0.

This will allow CPU0 to be a nohz_full CPU with a later change.

It may be possible for platforms with hardware/firmware restrictions
on suspend/wake effectively support this by handing off the final
stage to CPU0 when kernel housekeeping is no longer required. Another
option is to make housekeeping / nohz_full mask dynamic at runtime,
but the complexity could not be justified at this time.

Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rafael J . Wysocki <rafael.j.wysocki@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Link: https://lkml.kernel.org/r/20190411033448.20842-4-npiggin@xxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/powerpc/Kconfig | 4 ++++
include/linux/cpu.h | 7 ++++++-
kernel/cpu.c | 10 +++++++++-
kernel/power/Kconfig | 9 +++++++++
4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c3061..bc98b0e37a10 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -318,6 +318,10 @@ config ARCH_SUSPEND_POSSIBLE
(PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
|| 44x || 40x

+config ARCH_SUSPEND_NONZERO_CPU
+ def_bool y
+ depends on PPC_POWERNV || PPC_PSERIES
+
config PPC_DCR_NATIVE
bool

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7ab2f09c0a14..73baab8535c1 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -140,7 +140,12 @@ extern void enable_nonboot_cpus(void);

static inline int suspend_disable_secondary_cpus(void)
{
- return freeze_secondary_cpus(0);
+ int cpu = 0;
+
+ if (IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU))
+ cpu = -1;
+
+ return freeze_secondary_cpus(cpu);
}
static inline void suspend_enable_secondary_cpus(void)
{
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6754f3ecfd94..d1bf6e2b4752 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -9,6 +9,7 @@
#include <linux/notifier.h>
#include <linux/sched/signal.h>
#include <linux/sched/hotplug.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/task.h>
#include <linux/sched/smt.h>
#include <linux/unistd.h>
@@ -1199,8 +1200,15 @@ int freeze_secondary_cpus(int primary)
int cpu, error = 0;

cpu_maps_update_begin();
- if (!cpu_online(primary))
+ if (primary == -1) {
primary = cpumask_first(cpu_online_mask);
+ if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
+ primary = housekeeping_any_cpu(HK_FLAG_TIMER);
+ } else {
+ if (!cpu_online(primary))
+ primary = cpumask_first(cpu_online_mask);
+ }
+
/*
* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index f8fe57d1022e..9bbaaab14b36 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -114,6 +114,15 @@ config PM_SLEEP_SMP
depends on PM_SLEEP
select HOTPLUG_CPU

+config PM_SLEEP_SMP_NONZERO_CPU
+ def_bool y
+ depends on PM_SLEEP_SMP
+ depends on ARCH_SUSPEND_NONZERO_CPU
+ ---help---
+ If an arch can suspend (for suspend, hibernate, kexec, etc) on a
+ non-zero numbered CPU, it may define ARCH_SUSPEND_NONZERO_CPU. This
+ will allow nohz_full mask to include CPU0.
+
config PM_AUTOSLEEP
bool "Opportunistic sleep"
depends on PM_SLEEP