[PATCH RT 2/6] implement rwlocks management

From: Steven Rostedt
Date: Fri Apr 25 2008 - 10:21:59 EST


This patch adds the managment for rwlocks to have multiple readers.
Like the rwsems, it does not do PI boosting on readers when a writer
is blocked.

Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx>
---
include/linux/rt_lock.h | 5 -
include/linux/spinlock.h | 2
kernel/rt.c | 56 ++--------------
kernel/rtmutex.c | 158 ++++++++++++++++++++++++++++++++++-------------
kernel/rtmutex_common.h | 4 +
5 files changed, 129 insertions(+), 96 deletions(-)

Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 21:39:23.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 22:54:24.000000000 -0400
@@ -87,8 +87,7 @@ struct rw_semaphore {
* rwlocks - an RW semaphore plus lock-break field:
*/
typedef struct {
- struct rt_mutex lock;
- int read_depth;
+ struct rw_mutex owners;
unsigned int break_lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
@@ -96,7 +95,7 @@ typedef struct {
} rwlock_t;

#define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
- { .lock = __RT_SPIN_INITIALIZER(name), \
+ { .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex), \
RW_DEP_MAP_INIT(name) }
#else /* !PREEMPT_RT */

Index: linux-2.6.24.4-rt4/include/linux/spinlock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/spinlock.h 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/spinlock.h 2008-03-25 22:54:24.000000000 -0400
@@ -266,7 +266,7 @@ do { \

#ifdef CONFIG_PREEMPT_RT
# define rt_read_can_lock(rwl) (!rt_mutex_is_locked(&(rwl)->lock))
-# define rt_write_can_lock(rwl) (!rt_mutex_is_locked(&(rwl)->lock))
+# define rt_write_can_lock(rwl) ((rwl)->owners.owner == NULL)
#else
extern int rt_rwlock_can_lock_never_call_on_non_rt(rwlock_t *rwlock);
# define rt_read_can_lock(rwl) rt_rwlock_can_lock_never_call_on_non_rt(rwl)
Index: linux-2.6.24.4-rt4/kernel/rt.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rt.c 2008-03-25 21:38:23.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rt.c 2008-03-25 22:54:24.000000000 -0400
@@ -165,7 +165,7 @@ EXPORT_SYMBOL(_mutex_unlock);
*/
int __lockfunc rt_write_trylock(rwlock_t *rwlock)
{
- int ret = rt_mutex_trylock(&rwlock->lock);
+ int ret = rt_mutex_down_write_trylock(&rwlock->owners);

if (ret)
rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
@@ -183,23 +183,9 @@ EXPORT_SYMBOL(rt_write_trylock_irqsave);

int __lockfunc rt_read_trylock(rwlock_t *rwlock)
{
- struct rt_mutex *lock = &rwlock->lock;
- unsigned long flags;
int ret;

- /*
- * Read locks within the self-held write lock succeed.
- */
- spin_lock_irqsave(&lock->wait_lock, flags);
- if (rt_mutex_real_owner(lock) == current) {
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- rwlock->read_depth++;
- rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
- return 1;
- }
- spin_unlock_irqrestore(&lock->wait_lock, flags);
-
- ret = rt_mutex_trylock(lock);
+ ret = rt_mutex_down_read_trylock(&rwlock->owners);
if (ret)
rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);

@@ -210,27 +196,14 @@ EXPORT_SYMBOL(rt_read_trylock);
void __lockfunc rt_write_lock(rwlock_t *rwlock)
{
rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- LOCK_CONTENDED_RT(rwlock, rt_mutex_trylock, __rt_spin_lock);
+ LOCK_CONTENDED_RT_RW(rwlock, rt_mutex_down_write_trylock, rt_rwlock_write_lock);
}
EXPORT_SYMBOL(rt_write_lock);

void __lockfunc rt_read_lock(rwlock_t *rwlock)
{
- unsigned long flags;
- struct rt_mutex *lock = &rwlock->lock;
-
rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
- /*
- * Read locks within the write lock succeed.
- */
- spin_lock_irqsave(&lock->wait_lock, flags);
- if (rt_mutex_real_owner(lock) == current) {
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- rwlock->read_depth++;
- return;
- }
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- LOCK_CONTENDED_RT(rwlock, rt_mutex_trylock, __rt_spin_lock);
+ LOCK_CONTENDED_RT_RW(rwlock, rt_mutex_down_read_trylock, rt_rwlock_read_lock);
}

EXPORT_SYMBOL(rt_read_lock);
@@ -239,28 +212,14 @@ void __lockfunc rt_write_unlock(rwlock_t
{
/* NOTE: we always pass in '1' for nested, for simplicity */
rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- __rt_spin_unlock(&rwlock->lock);
+ rt_rwlock_write_unlock(&rwlock->owners);
}
EXPORT_SYMBOL(rt_write_unlock);

void __lockfunc rt_read_unlock(rwlock_t *rwlock)
{
- struct rt_mutex *lock = &rwlock->lock;
- unsigned long flags;
-
rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- // TRACE_WARN_ON(lock->save_state != 1);
- /*
- * Read locks within the self-held write lock succeed.
- */
- spin_lock_irqsave(&lock->wait_lock, flags);
- if (rt_mutex_real_owner(lock) == current && rwlock->read_depth) {
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- rwlock->read_depth--;
- return;
- }
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- __rt_spin_unlock(&rwlock->lock);
+ rt_rwlock_read_unlock(&rwlock->owners);
}
EXPORT_SYMBOL(rt_read_unlock);

@@ -289,8 +248,7 @@ void __rt_rwlock_init(rwlock_t *rwlock,
debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
lockdep_init_map(&rwlock->dep_map, name, key, 0);
#endif
- __rt_mutex_init(&rwlock->lock, name);
- rwlock->read_depth = 0;
+ rt_mutex_rwsem_init(&rwlock->owners, name);
}
EXPORT_SYMBOL(__rt_rwlock_init);

Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 22:39:14.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 22:54:24.000000000 -0400
@@ -1072,12 +1072,12 @@ try_to_take_rw_write(struct rw_mutex *rw
}

static void
-rt_read_slowlock(struct rw_mutex *rwm)
+rt_read_slowlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex_waiter waiter;
struct rt_mutex *mutex = &rwm->mutex;
int saved_lock_depth = -1;
- unsigned long flags;
+ unsigned long saved_state = -1, state, flags;

spin_lock_irqsave(&mutex->wait_lock, flags);
init_lists(mutex);
@@ -1096,13 +1096,19 @@ rt_read_slowlock(struct rw_mutex *rwm)

init_lists(mutex);

- /*
- * We drop the BKL here before we go into the wait loop to avoid a
- * possible deadlock in the scheduler.
- */
- if (unlikely(current->lock_depth >= 0))
- saved_lock_depth = rt_release_bkl(mutex, flags);
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx) {
+ /*
+ * We drop the BKL here before we go into the wait loop to avoid a
+ * possible deadlock in the scheduler.
+ */
+ if (unlikely(current->lock_depth >= 0))
+ saved_lock_depth = rt_release_bkl(mutex, flags);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ } else {
+ /* Spin lock must preserve BKL */
+ saved_state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+ saved_lock_depth = current->lock_depth;
+ }

for (;;) {
unsigned long saved_flags;
@@ -1125,21 +1131,36 @@ rt_read_slowlock(struct rw_mutex *rwm)
}
saved_flags = current->flags & PF_NOSCHED;
current->flags &= ~PF_NOSCHED;
+ if (!mtx)
+ current->lock_depth = -1;

spin_unlock_irqrestore(&mutex->wait_lock, flags);

debug_rt_mutex_print_deadlock(&waiter);

- if (waiter.task)
+ if (!mtx || waiter.task)
schedule_rt_mutex(mutex);

spin_lock_irqsave(&mutex->wait_lock, flags);

current->flags |= saved_flags;
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx)
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ else {
+ current->lock_depth = saved_lock_depth;
+ state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+ if (unlikely(state == TASK_RUNNING))
+ saved_state = TASK_RUNNING;
+ }
}

- set_current_state(TASK_RUNNING);
+ if (mtx)
+ set_current_state(TASK_RUNNING);
+ else {
+ state = xchg(&current->state, saved_state);
+ if (unlikely(state == TASK_RUNNING))
+ current->state = TASK_RUNNING;
+ }

if (unlikely(waiter.task))
remove_waiter(mutex, &waiter, flags);
@@ -1152,7 +1173,7 @@ rt_read_slowlock(struct rw_mutex *rwm)
spin_unlock_irqrestore(&mutex->wait_lock, flags);

/* Must we reaquire the BKL? */
- if (unlikely(saved_lock_depth >= 0))
+ if (mtx && unlikely(saved_lock_depth >= 0))
rt_reacquire_bkl(saved_lock_depth);

debug_rt_mutex_free_waiter(&waiter);
@@ -1160,7 +1181,8 @@ rt_read_slowlock(struct rw_mutex *rwm)

static inline void
rt_read_fastlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
{
retry:
if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
@@ -1176,12 +1198,17 @@ retry:
goto retry;
}
} else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}

void fastcall rt_mutex_down_read(struct rw_mutex *rwm)
{
- rt_read_fastlock(rwm, rt_read_slowlock);
+ rt_read_fastlock(rwm, rt_read_slowlock, 1);
+}
+
+void fastcall rt_rwlock_read_lock(struct rw_mutex *rwm)
+{
+ rt_read_fastlock(rwm, rt_read_slowlock, 0);
}


@@ -1231,12 +1258,12 @@ int __sched rt_mutex_down_read_trylock(s
}

static void
-rt_write_slowlock(struct rw_mutex *rwm)
+rt_write_slowlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter waiter;
int saved_lock_depth = -1;
- unsigned long flags;
+ unsigned long flags, saved_state = -1, state;

debug_rt_mutex_init_waiter(&waiter);
waiter.task = NULL;
@@ -1253,13 +1280,19 @@ rt_write_slowlock(struct rw_mutex *rwm)
}
update_rw_mutex_owner(rwm);

- /*
- * We drop the BKL here before we go into the wait loop to avoid a
- * possible deadlock in the scheduler.
- */
- if (unlikely(current->lock_depth >= 0))
- saved_lock_depth = rt_release_bkl(mutex, flags);
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx) {
+ /*
+ * We drop the BKL here before we go into the wait loop to avoid a
+ * possible deadlock in the scheduler.
+ */
+ if (unlikely(current->lock_depth >= 0))
+ saved_lock_depth = rt_release_bkl(mutex, flags);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ } else {
+ /* Spin locks must preserve the BKL */
+ saved_lock_depth = current->lock_depth;
+ saved_state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+ }

for (;;) {
unsigned long saved_flags;
@@ -1282,21 +1315,36 @@ rt_write_slowlock(struct rw_mutex *rwm)
}
saved_flags = current->flags & PF_NOSCHED;
current->flags &= ~PF_NOSCHED;
+ if (!mtx)
+ current->lock_depth = -1;

spin_unlock_irqrestore(&mutex->wait_lock, flags);

debug_rt_mutex_print_deadlock(&waiter);

- if (waiter.task)
+ if (!mtx || waiter.task)
schedule_rt_mutex(mutex);

spin_lock_irqsave(&mutex->wait_lock, flags);

current->flags |= saved_flags;
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx)
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ else {
+ current->lock_depth = saved_lock_depth;
+ state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+ if (unlikely(state == TASK_RUNNING))
+ saved_state = TASK_RUNNING;
+ }
}

- set_current_state(TASK_RUNNING);
+ if (mtx)
+ set_current_state(TASK_RUNNING);
+ else {
+ state = xchg(&current->state, saved_state);
+ if (unlikely(state == TASK_RUNNING))
+ current->state = TASK_RUNNING;
+ }

if (unlikely(waiter.task))
remove_waiter(mutex, &waiter, flags);
@@ -1308,7 +1356,7 @@ rt_write_slowlock(struct rw_mutex *rwm)
spin_unlock_irqrestore(&mutex->wait_lock, flags);

/* Must we reaquire the BKL? */
- if (unlikely(saved_lock_depth >= 0))
+ if (mtx && unlikely(saved_lock_depth >= 0))
rt_reacquire_bkl(saved_lock_depth);

WARN_ON(atomic_read(&rwm->count));
@@ -1319,7 +1367,8 @@ rt_write_slowlock(struct rw_mutex *rwm)

static inline void
rt_write_fastlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
{
unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;

@@ -1327,12 +1376,17 @@ rt_write_fastlock(struct rw_mutex *rwm,
rt_mutex_deadlock_account_lock(&rwm->mutex, current);
WARN_ON(atomic_read(&rwm->count));
} else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}

void fastcall rt_mutex_down_write(struct rw_mutex *rwm)
{
- rt_write_fastlock(rwm, rt_write_slowlock);
+ rt_write_fastlock(rwm, rt_write_slowlock, 1);
+}
+
+void fastcall rt_rwlock_write_lock(struct rw_mutex *rwm)
+{
+ rt_write_fastlock(rwm, rt_write_slowlock, 0);
}

static int
@@ -1373,10 +1427,11 @@ int fastcall rt_mutex_down_write_trylock
}

static void fastcall noinline __sched
-rt_read_slowunlock(struct rw_mutex *rwm)
+rt_read_slowunlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
unsigned long flags;
+ int savestate = !mtx;
struct rt_mutex_waiter *waiter;

spin_lock_irqsave(&mutex->wait_lock, flags);
@@ -1436,7 +1491,7 @@ rt_read_slowunlock(struct rw_mutex *rwm)
* will steal the lock from the reader. This is the
* only time we can have a reader pending on a lock.
*/
- wakeup_next_waiter(mutex, 0);
+ wakeup_next_waiter(mutex, savestate);

out:
spin_unlock_irqrestore(&mutex->wait_lock, flags);
@@ -1447,7 +1502,8 @@ rt_read_slowunlock(struct rw_mutex *rwm)

static inline void
rt_read_fastunlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
{
WARN_ON(!atomic_read(&rwm->count));
WARN_ON(!rwm->owner);
@@ -1455,20 +1511,26 @@ rt_read_fastunlock(struct rw_mutex *rwm,
if (likely(rt_rwlock_cmpxchg(rwm, current, NULL)))
rt_mutex_deadlock_account_unlock(current);
else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}

void fastcall rt_mutex_up_read(struct rw_mutex *rwm)
{
- rt_read_fastunlock(rwm, rt_read_slowunlock);
+ rt_read_fastunlock(rwm, rt_read_slowunlock, 1);
+}
+
+void fastcall rt_rwlock_read_unlock(struct rw_mutex *rwm)
+{
+ rt_read_fastunlock(rwm, rt_read_slowunlock, 0);
}

static void fastcall noinline __sched
-rt_write_slowunlock(struct rw_mutex *rwm)
+rt_write_slowunlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter *waiter;
struct task_struct *pendowner;
+ int savestate = !mtx;
unsigned long flags;

spin_lock_irqsave(&mutex->wait_lock, flags);
@@ -1499,7 +1561,7 @@ rt_write_slowunlock(struct rw_mutex *rwm

waiter = rt_mutex_top_waiter(mutex);
pendowner = waiter->task;
- wakeup_next_waiter(mutex, 0);
+ wakeup_next_waiter(mutex, savestate);

/* another writer is next? */
if (waiter->write_lock) {
@@ -1535,7 +1597,10 @@ rt_write_slowunlock(struct rw_mutex *rwm
waiter->task = NULL;
reader->pi_blocked_on = NULL;

- wake_up_process(reader);
+ if (savestate)
+ wake_up_process_mutex(reader);
+ else
+ wake_up_process(reader);

if (rt_mutex_has_waiters(mutex))
waiter = rt_mutex_top_waiter(mutex);
@@ -1565,7 +1630,9 @@ rt_write_slowunlock(struct rw_mutex *rwm

static inline void
rt_write_fastunlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm,
+ int mtx),
+ int mtx)
{
unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;

@@ -1573,12 +1640,17 @@ rt_write_fastunlock(struct rw_mutex *rwm
if (likely(rt_rwlock_cmpxchg(rwm, (struct task_struct *)val, NULL)))
rt_mutex_deadlock_account_unlock(current);
else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}

void fastcall rt_mutex_up_write(struct rw_mutex *rwm)
{
- rt_write_fastunlock(rwm, rt_write_slowunlock);
+ rt_write_fastunlock(rwm, rt_write_slowunlock, 1);
+}
+
+void fastcall rt_rwlock_write_unlock(struct rw_mutex *rwm)
+{
+ rt_write_fastunlock(rwm, rt_write_slowunlock, 0);
}

void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name)
Index: linux-2.6.24.4-rt4/kernel/rtmutex_common.h
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex_common.h 2008-03-25 21:45:43.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex_common.h 2008-03-25 22:54:24.000000000 -0400
@@ -166,6 +166,10 @@ extern void rt_mutex_down_write(struct r
extern int rt_mutex_down_read_trylock(struct rw_mutex *rwm);
extern void rt_mutex_down_read(struct rw_mutex *rwm);
extern void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name);
+extern void rt_rwlock_write_lock(struct rw_mutex *rwm);
+extern void rt_rwlock_read_lock(struct rw_mutex *rwm);
+extern void rt_rwlock_write_unlock(struct rw_mutex *rwm);
+extern void rt_rwlock_read_unlock(struct rw_mutex *rwm);

#endif /* CONFIG_PREEMPT_RT */


--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/