[PATCH 7/9 updated] vfs: protect remounting superblock read-only

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Miklos Szeredi <mszeredi@xxxxxxx>

Currently remouting superblock read-only is racy in a major way.

With the per mount read-only infrastructure it is now possible to
prevent most races, which this patch attempts.

Before starting the remount read-only, set MNT_READONLY on all mounts
so that writes are prevented during the remount.  Also set
MNT_WAS_WRITE on mounts that were not read-only.

If the remounting is unsuccessful, restore the mounts to read-write.
This can result in transient EROFS errors.  Unfortunately hodling off
writes is difficult as remount itself may touch the filesystem
(e.g. through load_nls()) which would then deadlock.

Careful thought needs to be given to places where mnt_flags are set
such as do_add_mount() and clone_mnt() to ensure that a read-write
mount is not added to a read-only superblock.

Small races still remain because delayed writes due to nlink going to
zero but inode still having a reference are not dealt with by this
patch.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
---
 fs/internal.h         |    3 ++
 fs/namespace.c        |   73 ++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/super.c            |   25 ++++++++++++++---
 include/linux/mount.h |    1 
 4 files changed, 96 insertions(+), 6 deletions(-)

Index: linux-2.6/fs/internal.h
===================================================================
--- linux-2.6.orig/fs/internal.h	2010-10-21 19:38:36.000000000 +0200
+++ linux-2.6/fs/internal.h	2010-10-22 17:48:16.000000000 +0200
@@ -69,6 +69,9 @@ extern void mnt_set_mountpoint(struct vf
 extern void release_mounts(struct list_head *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+extern int sb_prepare_remount_readonly(struct super_block *);
+extern void sb_cancel_remount_readonly(struct super_block *);
+extern void sb_finish_remount_readonly(struct super_block *);
 
 extern void __init mnt_init(void);
 
Index: linux-2.6/fs/namespace.c
===================================================================
--- linux-2.6.orig/fs/namespace.c	2010-10-22 16:02:14.000000000 +0200
+++ linux-2.6/fs/namespace.c	2010-10-22 17:59:30.000000000 +0200
@@ -419,6 +419,62 @@ void sb_force_remount_readonly(struct su
 }
 EXPORT_SYMBOL(sb_force_remount_readonly);
 
+int sb_prepare_remount_readonly(struct super_block *sb)
+{
+	struct vfsmount *mnt;
+	int err = 0;
+
+	br_write_lock(vfsmount_lock);
+	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+		if (!(mnt->mnt_flags & MNT_READONLY)) {
+			mnt->mnt_flags |= MNT_WRITE_HOLD;
+			smp_mb();
+			if (count_mnt_writers(mnt) > 0) {
+				err = -EBUSY;
+				break;
+			}
+		}
+	}
+	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+		if (mnt->mnt_flags & MNT_WRITE_HOLD) {
+			if (!err) {
+				mnt->mnt_flags |= MNT_READONLY | MNT_WAS_WRITE;
+				smp_wmb();
+			}
+			mnt->mnt_flags &= ~MNT_WRITE_HOLD;
+		}
+	}
+	br_write_unlock(vfsmount_lock);
+
+	return err;
+}
+
+void sb_cancel_remount_readonly(struct super_block *sb)
+{
+	struct vfsmount *mnt;
+
+	br_write_lock(vfsmount_lock);
+	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+		if (mnt->mnt_flags & MNT_WAS_WRITE)
+			mnt->mnt_flags &= ~(MNT_WAS_WRITE | MNT_READONLY);
+	}
+
+	br_write_unlock(vfsmount_lock);
+}
+
+void sb_finish_remount_readonly(struct super_block *sb)
+{
+	struct vfsmount *mnt;
+
+	br_write_lock(vfsmount_lock);
+	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+		if (mnt->mnt_flags & MNT_WAS_WRITE)
+			mnt->mnt_flags &= ~MNT_WAS_WRITE;
+	}
+	sb->s_flags |= MS_RDONLY;
+	br_write_unlock(vfsmount_lock);
+}
+
 void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
 {
 	mnt->mnt_sb = sb;
@@ -608,7 +664,7 @@ static struct vfsmount *clone_mnt(struct
 				goto out_free;
 		}
 
-		mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
+		mnt->mnt_flags = old->mnt_flags & MNT_PROPAGATION_MASK;
 		atomic_inc(&sb->s_active);
 		mnt->mnt_sb = sb;
 		mnt->mnt_root = dget(root);
@@ -636,7 +692,13 @@ static struct vfsmount *clone_mnt(struct
 				list_add(&mnt->mnt_expire, &old->mnt_expire);
 		}
 
+		/*
+		 * Add new mount to s_mounts.  Do this after fiddling
+		 * with propagation flags to prevent races with
+		 * remount ro/rw.
+		 */
 		br_write_lock(vfsmount_lock);
+		mnt->mnt_flags |= old->mnt_flags & ~MNT_PROPAGATION_MASK;
 		list_add_tail(&mnt->mnt_instance, &mnt->mnt_sb->s_mounts);
 		br_write_unlock(vfsmount_lock);
 	}
@@ -1782,6 +1844,14 @@ int do_add_mount(struct vfsmount *newmnt
 
 	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
 
+	/* Locking is necessary to prevent racing with remount r/o */
+	down_read(&newmnt->mnt_sb->s_umount);
+	if (newmnt->mnt_sb->s_flags & MS_RDONLY)
+		mnt_flags |= MNT_READONLY;
+
+	newmnt->mnt_flags = mnt_flags;
+	up_read(&newmnt->mnt_sb->s_umount);
+
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while (d_mountpoint(path->dentry) &&
@@ -1801,7 +1871,6 @@ int do_add_mount(struct vfsmount *newmnt
 	if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
 		goto unlock;
 
-	newmnt->mnt_flags = mnt_flags;
 	if ((err = graft_tree(newmnt, path)))
 		goto unlock;
 
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c	2010-10-22 16:02:14.000000000 +0200
+++ linux-2.6/fs/super.c	2010-10-22 17:48:16.000000000 +0200
@@ -573,18 +573,31 @@ int do_remount_sb(struct super_block *sb
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
 	if (remount_ro) {
-		if (force)
+		if (force) {
 			mark_files_ro(sb);
-		else if (!fs_may_remount_ro(sb))
-			return -EBUSY;
+		} else {
+			retval = sb_prepare_remount_readonly(sb);
+			if (retval)
+				return retval;
+
+			retval = -EBUSY;
+			if (!fs_may_remount_ro(sb))
+				goto cancel_readonly;
+		}
 	}
 
 	if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
 		/* If forced remount, go ahead despite any errors */
-		if (retval && !force)
+		if (retval && !force) {
+			if (remount_ro)
+				goto cancel_readonly;
 			return retval;
+		}
 	}
+	if (remount_ro && !force)
+		sb_finish_remount_readonly(sb);
+
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 
 	/*
@@ -598,6 +611,10 @@ int do_remount_sb(struct super_block *sb
 	if (remount_ro && sb->s_bdev)
 		invalidate_bdev(sb->s_bdev);
 	return 0;
+
+cancel_readonly:
+	sb_cancel_remount_readonly(sb);
+	return retval;
 }
 
 static void do_emergency_remount(struct work_struct *work)
Index: linux-2.6/include/linux/mount.h
===================================================================
--- linux-2.6.orig/include/linux/mount.h	2010-10-22 16:02:14.000000000 +0200
+++ linux-2.6/include/linux/mount.h	2010-10-22 17:59:30.000000000 +0200
@@ -30,6 +30,7 @@ struct mnt_namespace;
 
 #define MNT_SHRINKABLE	0x100
 #define MNT_WRITE_HOLD	0x200
+#define MNT_WAS_WRITE	0x400   /* used by remount to remember write mounts */
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux