[PATCH v3] ovl: Improving syncfs efficiency

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently syncfs(2) call on overlayfs just simply call sync_filesystem()
on upper_sb to synchronize whole dirty inodes in upper filesystem
regardless of the overlay ownership of the inode. It has obvious
shortcomings as below.

(1) Performance
Synchronization is probably heavy in most cases, especially when upper
filesystem is not dedicated to target overlayfs.

(2) Interference
Unplanned synchronization will probably impact IO performance of the
processes which use other overlayfs on same upper filesystem or directly
use upper filesystem.

This patch iterates overlay inodes to only sync target dirty inodes in
upper filesystem. By doing this, It is able to reduce cost of synchronization
and will not seriously impact IO performance of irrelative processes.

Signed-off-by: Chengguang Xu <cgxu519@xxxxxxxxxx>
---

Changes since v2:
- Decoupling with syncfs of upper fs by taking sb->s_sync_lock of
overlayfs.
- Factoring out sync operation to different helpers for easy understanding.

Changes since v1:
- If upper filesystem is readonly mode then skip synchronization.
- Introduce global wait list to replace temporary wait list for
concurrent synchronization.

 fs/overlayfs/ovl_entry.h |   5 ++
 fs/overlayfs/super.c     | 120 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 122 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 9d0bc03..ff935da 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -52,6 +52,9 @@ struct ovl_fs {
 	/* Did we take the inuse lock? */
 	bool upperdir_locked;
 	bool workdir_locked;
+	/* ovl inode sync list and lock */
+	spinlock_t  ovl_sync_list_lock;
+	struct list_head ovl_sync_list;
 };
 
 /* private information held for every overlayfs dentry */
@@ -80,6 +83,8 @@ struct ovl_inode {
 
 	/* synchronize copy up and more */
 	struct mutex lock;
+	/* ovl inode sync list */
+	struct list_head sync_list;
 };
 
 static inline struct ovl_inode *OVL_I(struct inode *inode)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 76440fe..947bb83 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -17,6 +17,8 @@
 #include <linux/statfs.h>
 #include <linux/seq_file.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
 #include "overlayfs.h"
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@xxxxxxxxxx>");
@@ -195,6 +197,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 	oi->__upperdentry = NULL;
 	oi->lower = NULL;
 	mutex_init(&oi->lock);
+	INIT_LIST_HEAD(&oi->sync_list);
 
 	return &oi->vfs_inode;
 }
@@ -252,6 +255,112 @@ static void ovl_put_super(struct super_block *sb)
 	ovl_free_fs(ofs);
 }
 
+static void ovl_sync_inodes(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct inode *inode, *i_next;
+	struct inode *upper_inode;
+	struct blk_plug plug;
+
+	struct writeback_control wbc = {
+		.sync_mode		= WB_SYNC_ALL,
+		.for_sync		= 1,
+		.range_start		= 0,
+		.range_end		= LLONG_MAX,
+		.nr_to_write		= LONG_MAX,
+	};
+
+	blk_start_plug(&plug);
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry_safe(inode, i_next, &sb->s_inodes, i_sb_list) {
+		upper_inode = ovl_inode_upper(inode);
+		if (!upper_inode)
+			continue;
+
+		spin_lock(&upper_inode->i_lock);
+		if (upper_inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE) ||
+			list_empty(&upper_inode->i_io_list)) {
+			spin_unlock(&upper_inode->i_lock);
+			continue;
+		}
+		spin_unlock(&upper_inode->i_lock);
+
+		if (!igrab(inode))
+			continue;
+
+		if (!igrab(upper_inode)) {
+			iput(inode);
+			continue;
+		}
+		spin_unlock(&sb->s_inode_list_lock);
+
+		sync_inode(upper_inode, &wbc);
+		spin_lock(&ofs->ovl_sync_list_lock);
+		if (list_empty(&OVL_I(inode)->sync_list))
+			list_add(&OVL_I(inode)->sync_list, &ofs->ovl_sync_list);
+		else {
+			iput(upper_inode);
+			iput(inode);
+		}
+		spin_unlock(&ofs->ovl_sync_list_lock);
+
+		if (need_resched()) {
+			blk_finish_plug(&plug);
+			cond_resched();
+			blk_start_plug(&plug);
+		}
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+	blk_finish_plug(&plug);
+}
+
+static void ovl_wait_inodes(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_inode *oi, *oi_next;
+	struct inode *upper_inode;
+
+	mutex_lock(&sb->s_sync_lock);
+	spin_lock(&ofs->ovl_sync_list_lock);
+	list_for_each_entry_safe(oi, oi_next, &ofs->ovl_sync_list, sync_list) {
+		upper_inode = ovl_inode_upper(&oi->vfs_inode);
+		list_del_init(&oi->sync_list);
+		spin_unlock(&ofs->ovl_sync_list_lock);
+
+		filemap_fdatawait_keep_errors(upper_inode->i_mapping);
+		iput(upper_inode);
+		iput(&oi->vfs_inode);
+
+		if (need_resched())
+			cond_resched();
+
+		spin_lock(&ofs->ovl_sync_list_lock);
+	}
+	spin_unlock(&ofs->ovl_sync_list_lock);
+	mutex_unlock(&sb->s_sync_lock);
+}
+
+/**
+ * ovl_sync_filesystem
+ * @sb: The overlayfs super block
+ *
+ * Sync underlying dirty inodes in upper filesystem and wait for completion.
+ */
+static int ovl_sync_filesystem(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct super_block *upper_sb = ofs->upper_mnt->mnt_sb;
+
+	ovl_sync_inodes(sb);
+	ovl_wait_inodes(sb);
+
+	if (upper_sb->s_op->sync_fs)
+		upper_sb->s_op->sync_fs(upper_sb, 1);
+
+	return sync_blockdev(upper_sb->s_bdev);
+}
+
 /* Sync real dirty inodes in upper filesystem (if it exists) */
 static int ovl_sync_fs(struct super_block *sb, int wait)
 {
@@ -266,17 +375,19 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
 	 * If this is a sync(2) call or an emergency sync, all the super blocks
 	 * will be iterated, including upper_sb, so no need to do anything.
 	 *
-	 * If this is a syncfs(2) call, then we do need to call
-	 * sync_filesystem() on upper_sb, but enough if we do it when being
+	 * If this is a syncfs(2) call, then we do need to call sync_inode()
+	 * on underlying dirty upper_inode, but enough if we do it when being
 	 * called with wait == 1.
 	 */
 	if (!wait)
 		return 0;
 
 	upper_sb = ofs->upper_mnt->mnt_sb;
+	if (sb_rdonly(upper_sb))
+		return 0;
 
 	down_read(&upper_sb->s_umount);
-	ret = sync_filesystem(upper_sb);
+	ret = ovl_sync_filesystem(sb);
 	up_read(&upper_sb->s_umount);
 
 	return ret;
@@ -1206,6 +1317,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!cred)
 		goto out_err;
 
+	spin_lock_init(&ofs->ovl_sync_list_lock);
+	INIT_LIST_HEAD(&ofs->ovl_sync_list);
+
 	ofs->config.index = ovl_index_def;
 	err = ovl_parse_opt((char *) data, &ofs->config);
 	if (err)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux