[RFC][PATCH 1/5] r/o bind mounts: change spinlock to mutex

Dave Hansen <dave@xxxxxxxxxxxxxxxxxx> · Tue, 29 Apr 2008 11:59:44 -0700

In order to lock out new writers during remount operations,
we need to hold the cpu_writer->lock's.  But, remounts can
sleep, so we can not use spinlocks.  Make them mutexes.

We do the fiddling in mnt_drop_write() because we used to
rely on the spin_lock() disabling preemption.  Since the
mutex does not do that, we have to use another method
to avoid underflow.

Signed-off-by: Dave Hansen <dave@xxxxxxxxxxxxxxxxxx>
---

 linux-2.6.git-dave/fs/namespace.c |   52 +++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff -puN fs/namespace.c~make-cpu_writer-lock-a-mutex fs/namespace.c

--- linux-2.6.git/fs/namespace.c~make-cpu_writer-lock-a-mutex	2008-04-29 11:56:39.000000000 -0700
+++ linux-2.6.git-dave/fs/namespace.c	2008-04-29 11:56:39.000000000 -0700
@@ -173,7 +173,7 @@ struct mnt_writer {
 	 * If holding multiple instances of this lock, they
 	 * must be ordered by cpu number.
 	 */
-	spinlock_t lock;
+	struct mutex lock;
 	struct lock_class_key lock_class; /* compiles out with !lockdep */
 	unsigned long count;
 	struct vfsmount *mnt;
@@ -185,7 +185,7 @@ static int __init init_mnt_writers(void)
 	int cpu;
 	for_each_possible_cpu(cpu) {
 		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
+		mutex_init(&writer->lock);
 		lockdep_set_class(&writer->lock, &writer->lock_class);
 		writer->count = 0;
 	}
@@ -200,7 +200,7 @@ static void unlock_mnt_writers(void)
 
 	for_each_possible_cpu(cpu) {
 		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		mutex_unlock(&cpu_writer->lock);
 	}
 }
 
@@ -252,8 +252,8 @@ int mnt_want_write(struct vfsmount *mnt)
 	int ret = 0;
 	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	cpu_writer = &__get_cpu_var(mnt_writers);
+	mutex_lock(&cpu_writer->lock);
 	if (__mnt_is_readonly(mnt)) {
 		ret = -EROFS;
 		goto out;
@@ -261,8 +261,7 @@ int mnt_want_write(struct vfsmount *mnt)
 	use_cpu_writer_for_mount(cpu_writer, mnt);
 	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	mutex_unlock(&cpu_writer->lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -274,7 +273,7 @@ static void lock_mnt_writers(void)
 
 	for_each_possible_cpu(cpu) {
 		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
+		mutex_lock(&cpu_writer->lock);
 		__clear_mnt_count(cpu_writer);
 		cpu_writer->mnt = NULL;
 	}
@@ -333,18 +332,34 @@ void mnt_drop_write(struct vfsmount *mnt
 	int must_check_underflow = 0;
 	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+retry:
+	cpu_writer = &__get_cpu_var(mnt_writers);
+	mutex_lock(&cpu_writer->lock);
 
 	use_cpu_writer_for_mount(cpu_writer, mnt);
 	if (cpu_writer->count > 0) {
 		cpu_writer->count--;
 	} else {
-		must_check_underflow = 1;
+		/*
+		 * Without this check, it is theoretically
+		 * possible for large number of processes
+		 * to atomic_dec(__mnt_writers) and cause
+		 * it to underflow.  Because we are under
+		 * the mutex (and there are NR_CPUS
+		 * mutexes), this limits the number of
+		 * concurrent decrements and underflow
+		 * level to NR_CPUS.
+		 */
+		if (atomic_read(&mnt->__mnt_writers) <
+			MNT_WRITER_UNDERFLOW_LIMIT) {
+			mutex_unlock(&cpu_writer->lock);
+			goto retry;
+		}
 		atomic_dec(&mnt->__mnt_writers);
+		must_check_underflow = 1;
 	}
 
-	spin_unlock(&cpu_writer->lock);
+	mutex_unlock(&cpu_writer->lock);
 	/*
 	 * Logically, we could call this each time,
 	 * but the __mnt_writers cacheline tends to
@@ -352,15 +367,6 @@ void mnt_drop_write(struct vfsmount *mnt
 	 */
 	if (must_check_underflow)
 		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -615,7 +621,7 @@ static inline void __mntput(struct vfsmo
 		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
 		if (cpu_writer->mnt != mnt)
 			continue;
-		spin_lock(&cpu_writer->lock);
+		mutex_lock(&cpu_writer->lock);
 		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
 		cpu_writer->count = 0;
 		/*
@@ -624,7 +630,7 @@ static inline void __mntput(struct vfsmo
 		 * it to be valid.
 		 */
 		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
+		mutex_unlock(&cpu_writer->lock);
 	}
 	/*
 	 * This probably indicates that somebody messed
_
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html