[PATCH RT 6/6] read lock Priority Inheritance implementation

From: Steven Rostedt
Date: Fri Apr 25 2008 - 10:22:38 EST


This patch adds the priority inheritance (PI) to the read / write locks.
When a task is blocked on the lock that eventually is owned by a reader
in the PI chain, it will boost all the readers if they are of lower priority
than the blocked task.

Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx>
---
include/linux/init_task.h | 8 +++
include/linux/rt_lock.h | 4 +
kernel/fork.c | 1
kernel/rtmutex.c | 115 ++++++++++++++++++++++++++++++++++++++++++----
4 files changed, 118 insertions(+), 10 deletions(-)

Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 23:14:47.000000000 -0400
@@ -13,6 +13,7 @@
#include <linux/rtmutex.h>
#include <asm/atomic.h>
#include <linux/spinlock_types.h>
+#include <linux/sched_prio.h>

#ifdef CONFIG_PREEMPT_RT
/*
@@ -66,6 +67,7 @@ struct rw_mutex {
atomic_t count; /* number of times held for read */
atomic_t owners; /* number of owners as readers */
struct list_head readers;
+ int prio;
};

/*
@@ -98,6 +100,7 @@ typedef struct {

#define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
{ .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex), \
+ .owners.prio = MAX_PRIO, \
RW_DEP_MAP_INIT(name) }
#else /* !PREEMPT_RT */

@@ -196,6 +199,7 @@ extern int __bad_func_type(void);

#define __RWSEM_INITIALIZER(name) \
{ .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex), \
+ .owners.prio = MAX_PRIO, \
RW_DEP_MAP_INIT(name) }

#define DECLARE_RWSEM(lockname) \
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 23:14:47.000000000 -0400
@@ -133,6 +133,8 @@ static inline void init_lists(struct rt_
}
}

+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio);
+
/*
* Calculate task priority from the waiter list priority
*
@@ -143,6 +145,8 @@ int rt_mutex_getprio(struct task_struct
{
int prio = min(task->normal_prio, get_rcu_prio(task));

+ prio = rt_mutex_get_readers_prio(task, prio);
+
if (likely(!task_has_pi_waiters(task)))
return prio;

@@ -185,6 +189,11 @@ static void rt_mutex_adjust_prio(struct
*/
int max_lock_depth = 1024;

+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task,
+ struct rt_mutex *lock,
+ int recursion_depth);
/*
* Adjust the priority chain. Also used for deadlock detection.
* Decreases task's usage by one - may thus free the task.
@@ -194,7 +203,8 @@ static int rt_mutex_adjust_prio_chain(st
int deadlock_detect,
struct rt_mutex *orig_lock,
struct rt_mutex_waiter *orig_waiter,
- struct task_struct *top_task)
+ struct task_struct *top_task,
+ int recursion_depth)
{
struct rt_mutex *lock;
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -296,8 +306,13 @@ static int rt_mutex_adjust_prio_chain(st
/* Grab the next task */
task = rt_mutex_owner(lock);

- /* Writers do not boost their readers. */
+ /*
+ * Readers are special. We may need to boost more than one owner.
+ */
if (task == RT_RW_READER) {
+ ret = rt_mutex_adjust_readers(orig_lock, orig_waiter,
+ top_task, lock,
+ recursion_depth);
spin_unlock_irqrestore(&lock->wait_lock, flags);
goto out;
}
@@ -479,9 +494,12 @@ static int task_blocks_on_rt_mutex(struc
spin_unlock(&current->pi_lock);

if (waiter == rt_mutex_top_waiter(lock)) {
- /* readers are not handled */
- if (owner == RT_RW_READER)
- return 0;
+ /* readers are handled differently */
+ if (owner == RT_RW_READER) {
+ res = rt_mutex_adjust_readers(lock, waiter,
+ current, lock, 0);
+ return res;
+ }

spin_lock(&owner->pi_lock);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -508,7 +526,7 @@ static int task_blocks_on_rt_mutex(struc
spin_unlock_irqrestore(&lock->wait_lock, flags);

res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
- current);
+ current, 0);

spin_lock_irq(&lock->wait_lock);

@@ -625,7 +643,7 @@ static void remove_waiter(struct rt_mute

spin_unlock_irqrestore(&lock->wait_lock, flags);

- rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+ rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current, 0);

spin_lock_irq(&lock->wait_lock);
}
@@ -652,7 +670,7 @@ void rt_mutex_adjust_pi(struct task_stru
get_task_struct(task);
spin_unlock_irqrestore(&task->pi_lock, flags);

- rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+ rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task, 0);
}

/*
@@ -1088,7 +1106,6 @@ static int try_to_take_rw_read(struct rw
if (rt_rwlock_pending_writer(rwm))
return 0;
if (rt_mutex_has_waiters(mutex)) {
- /* readers don't do PI */
waiter = rt_mutex_top_waiter(mutex);
if (current->prio >= waiter->task->prio)
return 0;
@@ -1102,7 +1119,7 @@ static int try_to_take_rw_read(struct rw
spin_unlock(&mtxowner->pi_lock);
}
} else if (rt_mutex_has_waiters(mutex)) {
- /* Readers don't do PI */
+ /* Readers do things differently with respect to PI */
waiter = rt_mutex_top_waiter(mutex);
spin_lock(&current->pi_lock);
plist_del(&waiter->pi_list_entry, &current->pi_waiters);
@@ -1608,6 +1625,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,

/* If no one is blocked, then clear all ownership */
if (!rt_mutex_has_waiters(mutex)) {
+ rwm->prio = MAX_PRIO;
/*
* If count is not zero, we are under the limit with
* no other readers.
@@ -1838,11 +1856,88 @@ void rt_mutex_rwsem_init(struct rw_mutex
rwm->owner = NULL;
atomic_set(&rwm->count, 0);
atomic_set(&rwm->owners, 0);
+ rwm->prio = MAX_PRIO;
INIT_LIST_HEAD(&rwm->readers);

__rt_mutex_init(mutex, name);
}

+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+ struct reader_lock_struct *rls;
+ struct rw_mutex *rwm;
+ int lock_prio;
+ int i;
+
+ for (i = 0; i < task->reader_lock_count; i++) {
+ rls = &task->owned_read_locks[i];
+ rwm = rls->lock;
+ if (rwm) {
+ lock_prio = rwm->prio;
+ if (prio > lock_prio)
+ prio = lock_prio;
+ }
+ }
+
+ return prio;
+}
+
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task,
+ struct rt_mutex *lock,
+ int recursion_depth)
+{
+ struct reader_lock_struct *rls;
+ struct rt_mutex_waiter *waiter;
+ struct task_struct *task;
+ struct rw_mutex *rwm = container_of(lock, struct rw_mutex, mutex);
+
+ if (rt_mutex_has_waiters(lock)) {
+ waiter = rt_mutex_top_waiter(lock);
+ /*
+ * Do we need to grab the task->pi_lock?
+ * Really, we are only reading it. If it
+ * changes, then that should follow this chain
+ * too.
+ */
+ rwm->prio = waiter->task->prio;
+ } else
+ rwm->prio = MAX_PRIO;
+
+ if (recursion_depth >= MAX_RWLOCK_DEPTH) {
+ WARN_ON(1);
+ return 1;
+ }
+
+ list_for_each_entry(rls, &rwm->readers, list) {
+ task = rls->task;
+ get_task_struct(task);
+ /*
+ * rt_mutex_adjust_prio_chain will do
+ * the put_task_struct
+ */
+ rt_mutex_adjust_prio_chain(task, 0, orig_lock,
+ orig_waiter, top_task,
+ recursion_depth+1);
+ }
+
+ return 0;
+}
+#else
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task,
+ struct rt_mutex *lock,
+ int recursion_depth)
+{
+ return 0;
+}
+
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+ return prio;
+}
#endif /* CONFIG_PREEMPT_RT */

#ifdef CONFIG_PREEMPT_BKL
Index: linux-2.6.24.4-rt4/include/linux/init_task.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/init_task.h 2008-03-25 16:41:47.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/init_task.h 2008-03-25 23:14:47.000000000 -0400
@@ -99,6 +99,13 @@ extern struct nsproxy init_nsproxy;
#define INIT_PREEMPT_RCU_BOOST(tsk)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */

+#ifdef CONFIG_PREEMPT_RT
+# define INIT_RW_OWNERS(tsk) .owned_read_locks = { \
+ [0 ... (MAX_RWLOCK_DEPTH - 1) ] = { .task = &tsk } },
+#else
+# define INIT_RW_OWNERS(tsk)
+#endif
+
extern struct group_info init_groups;

#define INIT_STRUCT_PID { \
@@ -189,6 +196,7 @@ extern struct group_info init_groups;
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
INIT_PREEMPT_RCU_BOOST(tsk) \
+ INIT_RW_OWNERS(tsk) \
}


Index: linux-2.6.24.4-rt4/kernel/fork.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/fork.c 2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/fork.c 2008-03-25 23:14:47.000000000 -0400
@@ -1214,6 +1214,7 @@ static struct task_struct *copy_process(
INIT_LIST_HEAD(&p->owned_read_locks[i].list);
p->owned_read_locks[i].count = 0;
p->owned_read_locks[i].lock = NULL;
+ p->owned_read_locks[i].task = p;
}
}
#endif

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/