Re: [PATCH] cpu/hotplug: Wait for cpu_hotplug to be enabled in cpu_up/down

From: Alexander Sverdlin
Date: Fri Jan 24 2020 - 06:51:35 EST


Hi!

On 24/01/2020 09:18, Matija Glavinic Pecotic wrote:
> cpu hotplug may be disabled via cpu_hotplug_enable/cpu_hotplug_disable.
> When disabled, cpu_down and cpu_up will fail with -EBUSY. Users of the
> cpu_up/cpu_down should handle this situation as this is mostly temporal
> disablement and exception should be made for EBUSY, assuming that EBUSY
> always stands for this situation and is worth repeating execution. One
> of the users of cpu_hotplug_enable/disable is pci_device_probe yielding
> errors on bringing cpu cores up/down if pci devices are getting probed.
>
> Problem was observed on x86 board by having partitioning of the system
> to RT/NRT cpu sets failing (of which part is to bring cpus down/up via
> sysfs) if pci devices would be getting probed at the same time. This is
> confusing for userspace as dependency to pci devices is not clear.
>
> Fix this behavior by waiting for cpu hotplug to be ready. Return -EBUSY
> only after hotplugging was not enabled for about 10 seconds.
>
> Fixes: 1ddd45f8d76f ("PCI: Use cpu_hotplug_disable() instead of get_online_cpus()")
> Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@xxxxxxxxx>

Reviewed-by: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxx>

> ---
> kernel/cpu.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 40 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 4dc279e..2e06ca9 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -31,6 +31,7 @@
> #include <linux/relay.h>
> #include <linux/slab.h>
> #include <linux/percpu-rwsem.h>
> +#include <linux/wait.h>
>
> #include <trace/events/power.h>
> #define CREATE_TRACE_POINTS
> @@ -278,11 +279,22 @@ void cpu_maps_update_done(void)
> }
>
> /*
> - * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
> + * If set, cpu_up and cpu_down will retry for cpu_hotplug_retries and
> + * eventually return -EBUSY if unsuccessful.
> * Should always be manipulated under cpu_add_remove_lock
> */
> static int cpu_hotplug_disabled;
>
> +/*
> + * waitqueue for waiting on cpu_hotplug_disabled
> + */
> +static DECLARE_WAIT_QUEUE_HEAD(wait_cpu_hp_enabled);
> +
> +/*
> + * Retries for cpu_hotplug to be enabled by cpu_up/cpu_down.
> + */
> +static int cpu_hotplug_retries = 10;
> +
> #ifdef CONFIG_HOTPLUG_CPU
>
> DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
> @@ -341,7 +353,7 @@ static void lockdep_release_cpus_lock(void)
>
> /*
> * Wait for currently running CPU hotplug operations to complete (if any) and
> - * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
> + * briefly disable CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
> * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
> * hotplug path before performing hotplug operations. So acquiring that lock
> * guarantees mutual exclusion from any currently running hotplug operations.
> @@ -366,6 +378,7 @@ void cpu_hotplug_enable(void)
> cpu_maps_update_begin();
> __cpu_hotplug_enable();
> cpu_maps_update_done();
> + wake_up(&wait_cpu_hp_enabled);
> }
> EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
>
> @@ -1044,11 +1057,21 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
>
> static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
> {
> - int err;
> + int err = -EBUSY, retries = cpu_hotplug_retries;
>
> - cpu_maps_update_begin();
> - err = cpu_down_maps_locked(cpu, target);
> - cpu_maps_update_done();
> + while (retries--) {
> + wait_event_timeout(wait_cpu_hp_enabled,
> + !cpu_hotplug_disabled,
> + HZ);
> + cpu_maps_update_begin();
> + if (cpu_hotplug_disabled) {
> + cpu_maps_update_done();
> + continue;
> + }
> + err = _cpu_down(cpu, 0, target);
> + cpu_maps_update_done();
> + break;
> + }
> return err;
> }
>
> @@ -1166,7 +1189,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
>
> static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
> {
> - int err = 0;
> + int err = 0, retries = cpu_hotplug_retries;
>
> if (!cpu_possible(cpu)) {
> pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
> @@ -1181,9 +1204,16 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
> if (err)
> return err;
>
> - cpu_maps_update_begin();
> -
> - if (cpu_hotplug_disabled) {
> + while (--retries) {
> + wait_event_timeout(wait_cpu_hp_enabled,
> + !cpu_hotplug_disabled,
> + HZ);
> + cpu_maps_update_begin();
> + if (!cpu_hotplug_disabled)
> + break;
> + cpu_maps_update_done();
> + }
> + if (!retries) {
> err = -EBUSY;
> goto out;
> }
>

--
Best regards,
Alexander Sverdlin.