[RFC][PATCH] cpu_pm: Remove RCU abuse

From: peterz
Date: Thu Sep 03 2020 - 10:59:49 EST


On Wed, Sep 02, 2020 at 05:58:55PM +0200, Ulf Hansson wrote:
> On Wed, 2 Sep 2020 at 14:14, <peterz@xxxxxxxxxxxxx> wrote:
> >
> > On Wed, Sep 02, 2020 at 09:03:37AM +0200, Ulf Hansson wrote:
> > > Lots of cpuidle drivers are using CPU_PM notifiers (grep for
> > > cpu_pm_enter and you will see) from their idlestates ->enter()
> > > callbacks. And for those we are already calling
> > > rcu_irq_enter_irqson|off() in cpu_pm_notify() when firing them.
> >
> > Yeah, that particular trainwreck is on my todo list already ... then
> > again, that list is forever overflowing.
> >
> > I'm thinking cpu_pm_unregister_notifier() is not a common thing? The few
> > I looked at seem to suggest 'never' is a good approximation.
>
> The trend is that drivers are turning into regular modules that may
> also need to manage "->remove()", which may mean unregistering the
> notifier. Of course, I don't know for sure whether that becomes a
> problem, but it seems quite limiting.

You can pin modules, once they're loaded they can never be removed
again.

Anyway, the below should 'work', I think.

---
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index f7e1d0eccdbc..72804e0883d5 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -12,21 +12,18 @@
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>

-static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+static DEFINE_SPINLOCK(cpu_pm_lock);

static int cpu_pm_notify(enum cpu_pm_event event)
{
int ret;

- /*
- * atomic_notifier_call_chain has a RCU read critical section, which
- * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
- * RCU know this.
- */
- rcu_irq_enter_irqson();
- ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL);
- rcu_irq_exit_irqson();
+ lockdep_assert_irqs_disabled();
+ ret = raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL);

return notifier_to_errno(ret);
}
@@ -35,9 +32,8 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
{
int ret;

- rcu_irq_enter_irqson();
- ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL);
- rcu_irq_exit_irqson();
+ lockdep_assert_irqs_disabled();
+ ret = raw_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL);

return notifier_to_errno(ret);
}
@@ -54,10 +50,28 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
*/
int cpu_pm_register_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cpu_pm_lock, flags);
+ ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+ spin_unlock_irqrestore(&cpu_pm_lock, flags);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);

+static bool __is_idle_cpu(int cpu, void *info)
+{
+ /*
+ * Racy as heck, however if we fail to see an idle task, it must be
+ * after we removed our element, so all is fine.
+ */
+ return is_idle_task(curr_task(cpu));
+}
+
+static void __nop_func(void *arg) { }
+
/**
* cpu_pm_unregister_notifier - unregister a driver with cpu_pm
* @nb: notifier block to be unregistered
@@ -69,7 +83,30 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
*/
int cpu_pm_unregister_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+ unsigned long flags;
+ int ret, cpu;
+
+ spin_lock_irqsave(&cpu_pm_lock, flags);
+ ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+ spin_unlock_irqrestore(&cpu_pm_lock, flags);
+
+ /*
+ * Orders the removal above vs the __is_idle_cpu() test below. Matches
+ * schedule() switching to the idle task.
+ *
+ * Ensures that if we miss an idle task, it must be after the removal.
+ */
+ smp_mb();
+
+ /*
+ * IPI all idle CPUs, this guarantees that no CPU is currently
+ * iterating the notifier list.
+ */
+ cpus_read_lock();
+ on_each_cpu_cond(__is_idle_cpu, __nop_func, NULL, 1);
+ cpus_read_unlock();
+
+ return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);