[PATCH 07/19] fs: Add freezing handling to mnt_want_write() / mnt_drop_write()

Jan Kara <jack@xxxxxxx> · Thu, 29 Mar 2012 01:43:22 +0200

Most of places where we want freeze protection coincides with the places where
we also have remount-ro protection. So make mnt_want_write() and
mnt_drop_write() (and their _file alternative) prevent freezing as well.
For the few cases that are really interested only in remount-ro protection
provide new function variants.

BugLink: https://bugs.launchpad.net/bugs/897421
Tested-by: Kamal Mostafa <kamal@xxxxxxxxxxxxx>
Tested-by: Peter M. Petrakis <peter.petrakis@xxxxxxxxxxxxx>
Tested-by: Dann Frazier <dann.frazier@xxxxxxxxxxxxx>
Tested-by: Massimo Morana <massimo.morana@xxxxxxxxxxxxx>
Signed-off-by: Jan Kara <jack@xxxxxxx>
---
 fs/file_table.c       |    2 +-
 fs/inode.c            |    4 +-
 fs/namei.c            |   16 +++++++-
 fs/namespace.c        |  101 +++++++++++++++++++++++++++++++++++++++----------
 fs/open.c             |    2 +-
 include/linux/mount.h |    4 ++
 6 files changed, 103 insertions(+), 26 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index 20002e3..31ab06f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -216,7 +216,7 @@ void drop_file_write_access(struct file *file)
 		return;
 	if (file_check_writeable(file) != 0)
 		return;
-	mnt_drop_write(mnt);
+	__mnt_drop_write(mnt);
 	file_release_write(file);
 }
 EXPORT_SYMBOL_GPL(drop_file_write_access);
diff --git a/fs/inode.c b/fs/inode.c
index d3ebdbe..095828c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1580,7 +1580,7 @@ void file_update_time(struct file *file)
 		return;
 
 	/* Finally allowed to write? Takes lock. */
-	if (mnt_want_write_file(file))
+	if (__mnt_want_write_file(file))
 		return;
 
 	/* Only change inode inside the lock region */
@@ -1591,7 +1591,7 @@ void file_update_time(struct file *file)
 	if (sync_it & S_MTIME)
 		inode->i_mtime = now;
 	mark_inode_dirty_sync(inode);
-	mnt_drop_write_file(file);
+	__mnt_drop_write_file(file);
 }
 EXPORT_SYMBOL(file_update_time);
 
diff --git a/fs/namei.c b/fs/namei.c
index 575d3a4..474b6b2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2166,12 +2166,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (nd->last.name[nd->last.len])
 		goto exit;
 
+	/*
+	 * We don't know whether write access will be needed or not. But we
+	 * must get the protection before getting i_mutex due to locking
+	 * constraints. Thus O_CREAT open will get blocked on frozen filesystem
+	 * even if it didn't need to create anything. Not that surprising...
+	 */
+	sb_start_write(dir->d_sb);
+
 	mutex_lock(&dir->d_inode->i_mutex);
 
 	dentry = lookup_hash(nd);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry)) {
 		mutex_unlock(&dir->d_inode->i_mutex);
+		sb_end_write(dir->d_sb);
 		goto exit;
 	}
 
@@ -2190,9 +2199,11 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		 * a permanent write count is taken through
 		 * the 'struct file' in nameidata_to_filp().
 		 */
-		error = mnt_want_write(nd->path.mnt);
-		if (error)
+		error = __mnt_want_write(nd->path.mnt);
+		if (error) {
+			sb_end_write(dir->d_sb);
 			goto exit_mutex_unlock;
+		}
 		want_write = 1;
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
@@ -2214,6 +2225,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	 * It already exists.
 	 */
 	mutex_unlock(&dir->d_inode->i_mutex);
+	sb_end_write(dir->d_sb);
 	audit_inode(pathname, path->dentry);
 
 	error = -EEXIST;
diff --git a/fs/namespace.c b/fs/namespace.c
index e608199..157afbe 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt)
 }
 
 /*
- * Most r/o checks on a fs are for operations that take
- * discrete amounts of time, like a write() or unlink().
- * We must keep track of when those operations start
- * (for permission checks) and when they end, so that
- * we can determine when writes are able to occur to
- * a filesystem.
+ * Most r/o & frozen checks on a fs are for operations that take discrete
+ * amounts of time, like a write() or unlink().  We must keep track of when
+ * those operations start (for permission checks) and when they end, so that we
+ * can determine when writes are able to occur to a filesystem.
  */
 /**
- * mnt_want_write - get write access to a mount
+ * __mnt_want_write - get write access to a mount without freeze protection
  * @m: the mount on which to take a write
  *
- * This tells the low-level filesystem that a write is
- * about to be performed to it, and makes sure that
- * writes are allowed before returning success.  When
- * the write operation is finished, mnt_drop_write()
- * must be called.  This is effectively a refcount.
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mnt it read-write) before
+ * returning success. This operation does not protect against filesystem being
+ * frozen. When the write operation is finished, __mnt_drop_write() must be
+ * called. This is effectively a refcount.
  */
-int mnt_want_write(struct vfsmount *m)
+int __mnt_want_write(struct vfsmount *m)
 {
 	struct mount *mnt = real_mount(m);
 	int ret = 0;
@@ -326,6 +324,28 @@ int mnt_want_write(struct vfsmount *m)
 		ret = -EROFS;
 	}
 	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__mnt_want_write);
+
+/**
+ * mnt_want_write - get write access to a mount
+ * @m: the mount on which to take a write
+ *
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mount is read-write, filesystem
+ * is not frozen) before returning success.  When the write operation is
+ * finished, mnt_drop_write() must be called.  This is effectively a refcount.
+ */
+int mnt_want_write(struct vfsmount *m)
+{
+	int ret;
+
+	sb_start_write(m->mnt_sb);
+	ret = __mnt_want_write(m);
+	if (ret)
+		sb_end_write(m->mnt_sb);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -355,38 +375,79 @@ int mnt_clone_write(struct vfsmount *mnt)
 EXPORT_SYMBOL_GPL(mnt_clone_write);
 
 /**
- * mnt_want_write_file - get write access to a file's mount
+ * __mnt_want_write_file - get write access to a file's mount
  * @file: the file who's mount on which to take a write
  *
- * This is like mnt_want_write, but it takes a file and can
+ * This is like __mnt_want_write, but it takes a file and can
  * do some optimisations if the file is open for write already
  */
-int mnt_want_write_file(struct file *file)
+int __mnt_want_write_file(struct file *file)
 {
 	struct inode *inode = file->f_dentry->d_inode;
+
 	if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
-		return mnt_want_write(file->f_path.mnt);
+		return __mnt_want_write(file->f_path.mnt);
 	else
 		return mnt_clone_write(file->f_path.mnt);
 }
+EXPORT_SYMBOL_GPL(__mnt_want_write_file);
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+	int ret;
+
+	sb_start_write(file->f_path.mnt->mnt_sb);
+	ret = __mnt_want_write_file(file);
+	if (ret)
+		sb_end_write(file->f_path.mnt->mnt_sb);
+	return ret;
+}
 EXPORT_SYMBOL_GPL(mnt_want_write_file);
 
 /**
- * mnt_drop_write - give up write access to a mount
+ * __mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
  *
  * Tells the low-level filesystem that we are done
  * performing writes to it.  Must be matched with
- * mnt_want_write() call above.
+ * __mnt_want_write() call above.
  */
-void mnt_drop_write(struct vfsmount *mnt)
+void __mnt_drop_write(struct vfsmount *mnt)
 {
 	preempt_disable();
 	mnt_dec_writers(real_mount(mnt));
 	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(__mnt_drop_write);
+
+/**
+ * mnt_drop_write - give up write access to a mount
+ * @mnt: the mount on which to give up write access
+ *
+ * Tells the low-level filesystem that we are done performing writes to it and
+ * also allows filesystem to be frozen again.  Must be matched with
+ * mnt_want_write() call above.
+ */
+void mnt_drop_write(struct vfsmount *mnt)
+{
+	__mnt_drop_write(mnt);
+	sb_end_write(mnt->mnt_sb);
+}
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
+void __mnt_drop_write_file(struct file *file)
+{
+	__mnt_drop_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL(__mnt_drop_write_file);
+
 void mnt_drop_write_file(struct file *file)
 {
 	mnt_drop_write(file->f_path.mnt);
diff --git a/fs/open.c b/fs/open.c
index 77becc0..456e415 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -637,7 +637,7 @@ static inline int __get_file_write_access(struct inode *inode,
 		/*
 		 * Balanced in __fput()
 		 */
-		error = mnt_want_write(mnt);
+		error = __mnt_want_write(mnt);
 		if (error)
 			put_write_access(inode);
 	}
diff --git a/include/linux/mount.h b/include/linux/mount.h
index d7029f4..d6ffc3d 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -55,10 +55,14 @@ struct vfsmount {
 
 struct file; /* forward dec */
 
+extern int __mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write(struct vfsmount *mnt);
+extern int __mnt_want_write_file(struct file *file);
 extern int mnt_want_write_file(struct file *file);
 extern int mnt_clone_write(struct vfsmount *mnt);
+extern void __mnt_drop_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
+extern void __mnt_drop_write_file(struct file *file);
 extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html