[PATCH 1/4] sched/wakeup: Strengthen current_save_and_set_rtlock_wait_state()

From: Peter Zijlstra
Date: Thu Sep 09 2021 - 07:04:19 EST


While looking at current_save_and_set_rtlock_wait_state() I'm thinking
it really ought to use smp_store_mb(), because something like:

current_save_and_set_rtlock_wait_state();
for (;;) {
if (try_lock())
break;

raw_spin_unlock_irq(&lock->wait_lock);
schedule();
raw_spin_lock_irq(&lock->wait_lock);

set_current_state(TASK_RTLOCK_WAIT);
}
current_restore_rtlock_saved_state();

which is the advertised usage in the comment, is actually broken,
since trylock() will only need a load-acquire in general and that
could be re-ordered against the state store, which could lead to a
missed wakeup -> BAD (tm).

While there, make them consistent with the IRQ usage in
set_special_state().

Fixes: 5f220be21418 ("sched/wakeup: Prepare for RT sleeping spin/rwlocks")
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/sched.h | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -245,7 +245,8 @@ struct task_group;
* if (try_lock())
* break;
* raw_spin_unlock_irq(&lock->wait_lock);
- * schedule_rtlock();
+ * if (!cond)
+ * schedule_rtlock();
* raw_spin_lock_irq(&lock->wait_lock);
* set_current_state(TASK_RTLOCK_WAIT);
* }
@@ -253,22 +254,24 @@ struct task_group;
*/
#define current_save_and_set_rtlock_wait_state() \
do { \
- lockdep_assert_irqs_disabled(); \
- raw_spin_lock(&current->pi_lock); \
+ unsigned long flags; /* may shadow */ \
+ \
+ raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->saved_state = current->__state; \
debug_rtlock_wait_set_state(); \
- WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
- raw_spin_unlock(&current->pi_lock); \
+ smp_store_mb(current->__state, TASK_RTLOCK_WAIT); \
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0);

#define current_restore_rtlock_saved_state() \
do { \
- lockdep_assert_irqs_disabled(); \
- raw_spin_lock(&current->pi_lock); \
+ unsigned long flags; /* may shadow */ \
+ \
+ raw_spin_lock_irqsave(&current->pi_lock, flags); \
debug_rtlock_wait_restore_state(); \
WRITE_ONCE(current->__state, current->saved_state); \
current->saved_state = TASK_RUNNING; \
- raw_spin_unlock(&current->pi_lock); \
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0);

#define get_current_state() READ_ONCE(current->__state)