Re: [PATCH v8 16/19] locking/rwsem: Guard against making count negative

From: Peter Zijlstra
Date: Tue Jun 11 2019 - 09:32:31 EST


On Tue, Jun 11, 2019 at 03:11:31PM +0200, Peter Zijlstra wrote:
> On Mon, May 20, 2019 at 04:59:15PM -0400, Waiman Long wrote:
>
> > +static inline long rwsem_read_trylock(struct rw_semaphore *sem, long *cnt)
> > +{
> > + long adjustment = -RWSEM_READER_BIAS;
> > +
> > + *cnt = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);
>
> I'm thinking we'd actually want add_return_acquire() here.
>
> > + if (unlikely(*cnt < 0)) {
> > + atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
> > + adjustment = 0;
> > + }
> > + return adjustment;
> > +}
>
> > @@ -1271,9 +1332,10 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
> > */
> > inline void __down_read(struct rw_semaphore *sem)
> > {
> > + long tmp, adjustment = rwsem_read_trylock(sem, &tmp);
> > +
> > + if (unlikely(tmp & RWSEM_READ_FAILED_MASK)) {
> > + rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE, adjustment);
> > DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
> > } else {
> > rwsem_set_reader_owned(sem);
> > @@ -1282,9 +1344,11 @@ inline void __down_read(struct rw_semaphore *sem)
> >
> > static inline int __down_read_killable(struct rw_semaphore *sem)
> > {
> > + long tmp, adjustment = rwsem_read_trylock(sem, &tmp);
> > +
> > + if (unlikely(tmp & RWSEM_READ_FAILED_MASK)) {
> > + if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE,
> > + adjustment)))
> > return -EINTR;
> > DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
> > } else {
>
> I'm confused by the need for @tmp; isn't that returning the exact same
> state !adjustment is?

Argh.. READ_FAILED_MASK isn't just the MSB. Bah, this is confusing.

Maybe something like so?

--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -116,13 +116,28 @@
#endif

/*
- * The definition of the atomic counter in the semaphore:
+ * On 64-bit architectures, the bit definitions of the count are:
*
- * Bit 0 - writer locked bit
- * Bit 1 - waiters present bit
- * Bit 2 - lock handoff bit
- * Bits 3-7 - reserved
- * Bits 8-X - 24-bit (32-bit) or 56-bit reader count
+ * Bit 0 - writer locked bit
+ * Bit 1 - waiters present bit
+ * Bit 2 - lock handoff bit
+ * Bits 3-7 - reserved
+ * Bits 8-62 - 55-bit reader count
+ * Bit 63 - read fail bit
+ *
+ * On 32-bit architectures, the bit definitions of the count are:
+ *
+ * Bit 0 - writer locked bit
+ * Bit 1 - waiters present bit
+ * Bit 2 - lock handoff bit
+ * Bits 3-7 - reserved
+ * Bits 8-30 - 23-bit reader count
+ * Bit 31 - read fail bit
+ *
+ * It is not likely that the most significant bit (read fail bit) will ever
+ * be set. This guard bit is still checked anyway in the down_read() fastpath
+ * just in case we need to use up more of the reader bits for other purpose
+ * in the future.
*
* atomic_long_fetch_add() is used to obtain reader lock, whereas
* atomic_long_cmpxchg() will be used to obtain writer lock.
@@ -139,6 +154,7 @@
#define RWSEM_WRITER_LOCKED (1UL << 0)
#define RWSEM_FLAG_WAITERS (1UL << 1)
#define RWSEM_FLAG_HANDOFF (1UL << 2)
+#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))

#define RWSEM_READER_SHIFT 8
#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
@@ -146,7 +162,7 @@
#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
- RWSEM_FLAG_HANDOFF)
+ RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)

/*
* All writes to owner are protected by WRITE_ONCE() to make sure that
@@ -254,6 +270,14 @@ static inline void rwsem_set_nonspinnabl
owner | RWSEM_NONSPINNABLE));
}

+static inline bool rwsem_read_trylock(struct rw_semaphore *sem)
+{
+ unsigned long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
+ WARN_ON_ONCE(cnt < 0);
+ return !(cnt & RWSEM_READ_FAILED_MASK);
+
+}
+
/*
* Return just the real task structure pointer of the owner
*/
@@ -403,6 +427,12 @@ static void rwsem_mark_wake(struct rw_se
}

/*
+ * No reader wakeup if there are too many of them already.
+ */
+ if (unlikely(atomic_long_read(&sem->count) < 0))
+ return;
+
+ /*
* Writers might steal the lock before we grant it to the next reader.
* We prefer to do the first reader grant before counting readers
* so we can bail out early if a writer stole the lock.
@@ -949,9 +979,9 @@ static struct rw_semaphore __sched *
rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_READER_BIAS;
- bool wake = false;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
+ bool wake = false;

/*
* Save the current read-owner of rwsem, if available, and the
@@ -1270,8 +1300,7 @@ static struct rw_semaphore *rwsem_downgr
*/
inline void __down_read(struct rw_semaphore *sem)
{
- if (unlikely(atomic_long_fetch_add_acquire(RWSEM_READER_BIAS,
- &sem->count) & RWSEM_READ_FAILED_MASK)) {
+ if (!rwsem_read_trylock(sem)) {
rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
} else {
@@ -1281,9 +1310,8 @@ inline void __down_read(struct rw_semaph

static inline int __down_read_killable(struct rw_semaphore *sem)
{
- if (unlikely(atomic_long_fetch_add_acquire(RWSEM_READER_BIAS,
- &sem->count) & RWSEM_READ_FAILED_MASK)) {
- if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
+ if (!rwsem_read_trylock(sem)) {
+ if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE));
return -EINTR;
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
} else {
@@ -1359,6 +1387,7 @@ inline void __up_read(struct rw_semaphor
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
rwsem_clear_reader_owned(sem);
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
+ DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_wr_nonspinnable(sem);