[RFC][PATCH 1/5] r/o bind mounts: change spinlock to mutex

From: Dave Hansen
Date: Tue Apr 29 2008 - 15:00:43 EST



In order to lock out new writers during remount operations,
we need to hold the cpu_writer->lock's. But, remounts can
sleep, so we can not use spinlocks. Make them mutexes.

We do the fiddling in mnt_drop_write() because we used to
rely on the spin_lock() disabling preemption. Since the
mutex does not do that, we have to use another method
to avoid underflow.

Signed-off-by: Dave Hansen <dave@xxxxxxxxxxxxxxxxxx>
---

linux-2.6.git-dave/fs/namespace.c | 52 +++++++++++++++++++++-----------------
1 file changed, 29 insertions(+), 23 deletions(-)

diff -puN fs/namespace.c~make-cpu_writer-lock-a-mutex fs/namespace.c
--- linux-2.6.git/fs/namespace.c~make-cpu_writer-lock-a-mutex 2008-04-29 11:56:39.000000000 -0700
+++ linux-2.6.git-dave/fs/namespace.c 2008-04-29 11:56:39.000000000 -0700
@@ -173,7 +173,7 @@ struct mnt_writer {
* If holding multiple instances of this lock, they
* must be ordered by cpu number.
*/
- spinlock_t lock;
+ struct mutex lock;
struct lock_class_key lock_class; /* compiles out with !lockdep */
unsigned long count;
struct vfsmount *mnt;
@@ -185,7 +185,7 @@ static int __init init_mnt_writers(void)
int cpu;
for_each_possible_cpu(cpu) {
struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
- spin_lock_init(&writer->lock);
+ mutex_init(&writer->lock);
lockdep_set_class(&writer->lock, &writer->lock_class);
writer->count = 0;
}
@@ -200,7 +200,7 @@ static void unlock_mnt_writers(void)

for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_unlock(&cpu_writer->lock);
+ mutex_unlock(&cpu_writer->lock);
}
}

@@ -252,8 +252,8 @@ int mnt_want_write(struct vfsmount *mnt)
int ret = 0;
struct mnt_writer *cpu_writer;

- cpu_writer = &get_cpu_var(mnt_writers);
- spin_lock(&cpu_writer->lock);
+ cpu_writer = &__get_cpu_var(mnt_writers);
+ mutex_lock(&cpu_writer->lock);
if (__mnt_is_readonly(mnt)) {
ret = -EROFS;
goto out;
@@ -261,8 +261,7 @@ int mnt_want_write(struct vfsmount *mnt)
use_cpu_writer_for_mount(cpu_writer, mnt);
cpu_writer->count++;
out:
- spin_unlock(&cpu_writer->lock);
- put_cpu_var(mnt_writers);
+ mutex_unlock(&cpu_writer->lock);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -274,7 +273,7 @@ static void lock_mnt_writers(void)

for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_lock(&cpu_writer->lock);
+ mutex_lock(&cpu_writer->lock);
__clear_mnt_count(cpu_writer);
cpu_writer->mnt = NULL;
}
@@ -333,18 +332,34 @@ void mnt_drop_write(struct vfsmount *mnt
int must_check_underflow = 0;
struct mnt_writer *cpu_writer;

- cpu_writer = &get_cpu_var(mnt_writers);
- spin_lock(&cpu_writer->lock);
+retry:
+ cpu_writer = &__get_cpu_var(mnt_writers);
+ mutex_lock(&cpu_writer->lock);

use_cpu_writer_for_mount(cpu_writer, mnt);
if (cpu_writer->count > 0) {
cpu_writer->count--;
} else {
- must_check_underflow = 1;
+ /*
+ * Without this check, it is theoretically
+ * possible for large number of processes
+ * to atomic_dec(__mnt_writers) and cause
+ * it to underflow. Because we are under
+ * the mutex (and there are NR_CPUS
+ * mutexes), this limits the number of
+ * concurrent decrements and underflow
+ * level to NR_CPUS.
+ */
+ if (atomic_read(&mnt->__mnt_writers) <
+ MNT_WRITER_UNDERFLOW_LIMIT) {
+ mutex_unlock(&cpu_writer->lock);
+ goto retry;
+ }
atomic_dec(&mnt->__mnt_writers);
+ must_check_underflow = 1;
}

- spin_unlock(&cpu_writer->lock);
+ mutex_unlock(&cpu_writer->lock);
/*
* Logically, we could call this each time,
* but the __mnt_writers cacheline tends to
@@ -352,15 +367,6 @@ void mnt_drop_write(struct vfsmount *mnt
*/
if (must_check_underflow)
handle_write_count_underflow(mnt);
- /*
- * This could be done right after the spinlock
- * is taken because the spinlock keeps us on
- * the cpu, and disables preemption. However,
- * putting it here bounds the amount that
- * __mnt_writers can underflow. Without it,
- * we could theoretically wrap __mnt_writers.
- */
- put_cpu_var(mnt_writers);
}
EXPORT_SYMBOL_GPL(mnt_drop_write);

@@ -615,7 +621,7 @@ static inline void __mntput(struct vfsmo
struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
if (cpu_writer->mnt != mnt)
continue;
- spin_lock(&cpu_writer->lock);
+ mutex_lock(&cpu_writer->lock);
atomic_add(cpu_writer->count, &mnt->__mnt_writers);
cpu_writer->count = 0;
/*
@@ -624,7 +630,7 @@ static inline void __mntput(struct vfsmo
* it to be valid.
*/
cpu_writer->mnt = NULL;
- spin_unlock(&cpu_writer->lock);
+ mutex_unlock(&cpu_writer->lock);
}
/*
* This probably indicates that somebody messed
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/