[PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support

From: Ahmed S. Darwish
Date: Fri Sep 04 2020 - 11:32:45 EST


Preemption must be disabled before entering a sequence counter write
side critical section. Otherwise the read side section can preempt the
write side section and spin for the entire scheduler tick. If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT though: it can lead
to higher latencies, and the write side sections will not be able to
acquire locks which become sleeping locks (e.g. spinlock_t).

To remain preemptible, while avoiding a possible livelock caused by the
reader preempting the writer, use a different technique: let the reader
detect if a seqcount_LOCKNAME_t writer is in progress. If that's the
case, acquire then release the associated LOCKNAME writer serialization
lock. This will allow any possibly-preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this lock-unlock technique for all seqcount_LOCKNAME_t with
an associated (PREEMPT_RT) sleeping lock.

Link: https://lkml.kernel.org/r/159708609435.2571.13948681727529247231.tglx@nanos
Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@xxxxxxxxxxxxx
References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@xxxxxxxxxxxxx>
---
include/linux/seqlock.h | 72 +++++++++++++++++++++++++++++++++--------
1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index ed1c6c0ff8bb..6ac5a63fc536 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -17,6 +17,7 @@
#include <linux/kcsan-checks.h>
#include <linux/lockdep.h>
#include <linux/mutex.h>
+#include <linux/ww_mutex.h>
#include <linux/preempt.h>
#include <linux/spinlock.h>

@@ -131,7 +132,23 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
* See Documentation/locking/seqlock.rst
*/

-#ifdef CONFIG_LOCKDEP
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks. See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
#define __SEQ_LOCK(expr) expr
#else
#define __SEQ_LOCK(expr)
@@ -162,10 +179,12 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
*
* @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t
* @locktype: LOCKNAME canonical C data type
- * @preemptible: preemptibility of above lockname
+ * @preemptible: preemptibility of above locktype
* @lockmember: argument for lockdep_assert_held()
+ * @lockbase: associated lock release function (prefix only)
+ * @lock_acquire: associated lock acquisition function (full call)
*/
-#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember) \
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
typedef struct seqcount_##lockname { \
seqcount_t seqcount; \
__SEQ_LOCK(locktype *lock); \
@@ -187,7 +206,23 @@ __seqprop_seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \
static __always_inline unsigned \
__seqprop_seqcount_##lockname##_sequence(const seqcount_##lockname##_t *s)\
{ \
- return READ_ONCE(s->seqcount.sequence); \
+ unsigned seq = READ_ONCE(s->seqcount.sequence); \
+ \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ return seq; \
+ \
+ if (preemptible && unlikely(seq & 1)) { \
+ __SEQ_LOCK(lock_acquire); \
+ __SEQ_LOCK(lockbase##_unlock(s->lock)); \
+ \
+ /* \
+ * Re-read the sequence counter since the (possibly \
+ * preempted) writer made progress. \
+ */ \
+ seq = READ_ONCE(s->seqcount.sequence); \
+ } \
+ \
+ return seq; \
} \
\
static __always_inline bool \
@@ -226,11 +261,13 @@ static inline void __seqprop_seqcount_assert(const seqcount_t *s)
lockdep_assert_preemption_disabled();
}

-SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock)
-SEQCOUNT_LOCKNAME(spinlock, spinlock_t, false, s->lock)
-SEQCOUNT_LOCKNAME(rwlock, rwlock_t, false, s->lock)
-SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock)
-SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base)
+#define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT)
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock))
+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock))
+SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))

/**
* SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -406,13 +443,20 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
return __read_seqcount_t_retry(s, start);
}

+/*
+ * Enforce non-preemptibility for all seqcount_LOCKNAME_t writers. Don't
+ * do it for PREEMPT_RT, for the reasons outlined at __SEQ_LOCK().
+ */
+#define __seq_enforce_writer_non_preemptibility(s) \
+ (!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
+
/**
* raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*/
#define raw_write_seqcount_begin(s) \
do { \
- if (__seqcount_lock_preemptible(s)) \
+ if (__seq_enforce_writer_non_preemptibility(s)) \
preempt_disable(); \
\
raw_write_seqcount_t_begin(__seqcount_ptr(s)); \
@@ -433,7 +477,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
do { \
raw_write_seqcount_t_end(__seqcount_ptr(s)); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (__seq_enforce_writer_non_preemptibility(s)) \
preempt_enable(); \
} while (0)

@@ -456,7 +500,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
do { \
__seqcount_assert_lock_held(s); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (__seq_enforce_writer_non_preemptibility(s)) \
preempt_disable(); \
\
write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \
@@ -483,7 +527,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
do { \
__seqcount_assert_lock_held(s); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (__seq_enforce_writer_non_preemptibility(s)) \
preempt_disable(); \
\
write_seqcount_t_begin(__seqcount_ptr(s)); \
@@ -504,7 +548,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
do { \
write_seqcount_t_end(__seqcount_ptr(s)); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (__seq_enforce_writer_non_preemptibility(s)) \
preempt_enable(); \
} while (0)

--
2.28.0