[PATCH 07/12] rwsem: wake queued readers when writer blocks on active read lock

From: Michel Lespinasse
Date: Tue May 11 2010 - 23:22:37 EST


This change addresses the following situation:

- Thread A acquires the rwsem for read
- Thread B tries to acquire the rwsem for write, notices there is already
an active owner for the rwsem.
- Thread C tries to acquire the rwsem for read, notices that thread B already
tried to acquire it.
- Thread C grabs the spinlock and queues itself on the wait queue.
- Thread B grabs the spinlock and queues itself behind C. At this point A is
the only remaining active owner on the rwsem.

In this situation thread B could notice that it was the last active writer
on the rwsem, and decide to wake C to let it proceed in parallel with A
since they both only want the rwsem for read.

Signed-off-by: Michel Lespinasse <walken@xxxxxxxxxx>
---
lib/rwsem.c | 32 ++++++++++++++++++++++----------
1 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/lib/rwsem.c b/lib/rwsem.c
index 9d0899b..84bbc55 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,6 +36,10 @@ struct rwsem_waiter {
#define RWSEM_WAITING_FOR_WRITE 0x00000002
};

+#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
+#define RWSEM_WAKE_READERS 1 /* Sem is read owned by other thread */
+#define RWSEM_WAKE_READ_OWNED 2 /* Sem is read owned by caller thread */
+
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here from up_xxxx(), then:
@@ -46,8 +50,8 @@ struct rwsem_waiter {
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if downgrading is false
*/
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
+static struct rw_semaphore *
+__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
@@ -58,9 +62,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
goto readers_only;

- if (downgrading)
- /* Caller's lock is still active, so we can't possibly
- * succeed waking writers.
+ if (wake_type != RWSEM_WAKE_ANY)
+ /* Another active reader was observed, so wakeup is not
+ * likely to succeed. Save the atomic op.
*/
goto out;

@@ -115,7 +119,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)

retry_readers:
oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
- if (!downgrading && (oldcount < RWSEM_WAITING_BIAS))
+ if (wake_type != RWSEM_WAKE_READ_OWNED &&
+ oldcount < RWSEM_WAITING_BIAS)
/* Someone grabbed the sem for write already */
goto undo_readers;

@@ -172,9 +177,16 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/* we're now waiting on the lock, but no longer actively read-locking */
count = rwsem_atomic_update(adjustment, sem);

- /* if there are no active locks, wake the front queued process(es) up */
+ /* if there are no active locks, wake the front queued process(es) up.
+ *
+ * or if we're called from a failed down_write(), and there were
+ * already threads queued before us, and there are no active writers,
+ * the lock must be read owned; try to wake any read locks that were
+ * queued ahead of us. */
if (!(count & RWSEM_ACTIVE_MASK))
- sem = __rwsem_do_wake(sem, 0);
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+ else if (adjustment > 0 && count > RWSEM_WAITING_BIAS)
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);

spin_unlock_irq(&sem->wait_lock);

@@ -230,7 +242,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)

/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 0);
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);

spin_unlock_irqrestore(&sem->wait_lock, flags);

@@ -250,7 +262,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)

/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 1);
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);

spin_unlock_irqrestore(&sem->wait_lock, flags);

--
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/