[PATCH 5/8] cpu: cpu-hotplug deadlock

From: Gautham R Shenoy
Date: Tue Apr 29 2008 - 09:02:24 EST


Subject: cpu: cpu-hotplug deadlock

From: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>

cpu_hotplug.mutex is basically a lock-internal lock; but by keeping it locked
over the 'write' section (cpu_hotplug_begin/done) a lock inversion happens when
some of the write side code calls into code that would otherwise take a
read lock.

And it so happens that read-in-write recursion is expressly permitted.

Fix this by turning cpu_hotplug into a proper stand alone unfair reader/writer
lock that allows reader-in-reader and reader-in-writer recursion.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Signed-off-by: Gautham R Shenoy <ego@xxxxxxxxxx>
---

kernel/cpu.c | 97 +++++++++++++++++++++++++++++++++-------------------------
1 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2eff3f6..e856c22 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -27,12 +27,13 @@ static int cpu_hotplug_disabled;

static struct {
struct task_struct *active_writer;
- struct mutex lock; /* Synchronizes accesses to refcount, */
+ spinlock_t lock; /* Synchronizes accesses to refcount, */
/*
* Also blocks the new readers during
* an ongoing cpu hotplug operation.
*/
int refcount;
+ wait_queue_head_t reader_queue;
wait_queue_head_t writer_queue;
} cpu_hotplug;

@@ -41,8 +42,9 @@ static struct {
void __init cpu_hotplug_init(void)
{
cpu_hotplug.active_writer = NULL;
- mutex_init(&cpu_hotplug.lock);
+ spin_lock_init(&cpu_hotplug.lock);
cpu_hotplug.refcount = 0;
+ init_waitqueue_head(&cpu_hotplug.reader_queue);
init_waitqueue_head(&cpu_hotplug.writer_queue);
}

@@ -51,27 +53,42 @@ void __init cpu_hotplug_init(void)
void get_online_cpus(void)
{
might_sleep();
+
+ spin_lock(&cpu_hotplug.lock);
if (cpu_hotplug.active_writer == current)
- return;
- mutex_lock(&cpu_hotplug.lock);
+ goto unlock;
+
+ if (cpu_hotplug.active_writer) {
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&cpu_hotplug.reader_queue, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!cpu_hotplug.active_writer)
+ break;
+ spin_unlock(&cpu_hotplug.lock);
+ schedule();
+ spin_lock(&cpu_hotplug.lock);
+ }
+ finish_wait(&cpu_hotplug.reader_queue, &wait);
+ }
cpu_hotplug.refcount++;
- mutex_unlock(&cpu_hotplug.lock);
-
+ unlock:
+ spin_unlock(&cpu_hotplug.lock);
}
EXPORT_SYMBOL_GPL(get_online_cpus);

void put_online_cpus(void)
{
+ spin_lock(&cpu_hotplug.lock);
if (cpu_hotplug.active_writer == current)
- return;
- mutex_lock(&cpu_hotplug.lock);
- cpu_hotplug.refcount--;
+ goto unlock;

- if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
+ cpu_hotplug.refcount--;
+ if (!cpu_hotplug.refcount)
wake_up(&cpu_hotplug.writer_queue);
-
- mutex_unlock(&cpu_hotplug.lock);
-
+ unlock:
+ spin_unlock(&cpu_hotplug.lock);
}
EXPORT_SYMBOL_GPL(put_online_cpus);

@@ -95,45 +112,41 @@ void cpu_maps_update_done(void)
* This ensures that the hotplug operation can begin only when the
* refcount goes to zero.
*
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_maps_update_begin is always called after invoking
- * cpu_maps_update_begin, we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- * writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- * non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
+ * cpu_hotplug is basically an unfair recursive reader/writer lock that
+ * allows reader in writer recursion.
*/
static void cpu_hotplug_begin(void)
{
- DECLARE_WAITQUEUE(wait, current);
-
- mutex_lock(&cpu_hotplug.lock);
+ might_sleep();

- cpu_hotplug.active_writer = current;
- add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
- while (cpu_hotplug.refcount) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- mutex_unlock(&cpu_hotplug.lock);
- schedule();
- mutex_lock(&cpu_hotplug.lock);
+ spin_lock(&cpu_hotplug.lock);
+ if (cpu_hotplug.refcount || cpu_hotplug.active_writer) {
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&cpu_hotplug.writer_queue, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!cpu_hotplug.refcount && !cpu_hotplug.active_writer)
+ break;
+ spin_unlock(&cpu_hotplug.lock);
+ schedule();
+ spin_lock(&cpu_hotplug.lock);
+ }
+ finish_wait(&cpu_hotplug.writer_queue, &wait);
}
- remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
+ cpu_hotplug.active_writer = current;
+ spin_unlock(&cpu_hotplug.lock);
}

static void cpu_hotplug_done(void)
{
+ spin_lock(&cpu_hotplug.lock);
cpu_hotplug.active_writer = NULL;
- mutex_unlock(&cpu_hotplug.lock);
+ if (!list_empty(&cpu_hotplug.writer_queue.task_list))
+ wake_up(&cpu_hotplug.writer_queue);
+ else
+ wake_up_all(&cpu_hotplug.reader_queue);
+ spin_unlock(&cpu_hotplug.lock);
}
/* Need to know about CPUs going up/down? */
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
--
Thanks and Regards
gautham
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/