[CC: Jan Kara] On Thu, Jan 11, 2018 at 3:48 AM, Chengguang Xu <cgxu519@xxxxxxxxxx> wrote: > Currently syncfs(2) call on overlayfs just simply call sync_filesystem() > on upper_sb to synchronize whole dirty inodes in upper filesystem > regardless of the overlay ownership of the inode. It has obvious > shortcomings as below. > > (1) Performance > Synchronization is probably heavy in most cases, especially when upper > filesystem is not dedicated to target overlayfs. > > (2) Interference > Unplanned synchronization will probably impact IO performance of the > processes which use other overlayfs on same upper filesystem or directly > use upper filesystem. > > This patch iterates overlay inodes to only sync target dirty inodes in > upper filesystem. By doing this, It is able to reduce cost of synchronization > and will not seriously impact IO performance of irrelative processes. > > Signed-off-by: Chengguang Xu <cgxu519@xxxxxxxxxx> Looks good. Reviewed-by: Amir Goldstein <amir73il@xxxxxxxxx> Jan, Would you mind taking a quick look at this? Overlayfs inodes have no dirty mapping, only the underlying upper inodes do, so sync_inodes_sb() is not enough for overlay syncfs(). This code creates the list of upper inodes to sync, syncs them and then calls the underlying upper fs sync_fs(). Does this look reasonable? Thank, Amir. > --- > > Changes since v2: > - Decoupling with syncfs of upper fs by taking sb->s_sync_lock of > overlayfs. > - Factoring out sync operation to different helpers for easy understanding. > > Changes since v1: > - If upper filesystem is readonly mode then skip synchronization. > - Introduce global wait list to replace temporary wait list for > concurrent synchronization. > > fs/overlayfs/ovl_entry.h | 5 ++ > fs/overlayfs/super.c | 120 +++++++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 122 insertions(+), 3 deletions(-) > > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h > index 9d0bc03..ff935da 100644 > --- a/fs/overlayfs/ovl_entry.h > +++ b/fs/overlayfs/ovl_entry.h > @@ -52,6 +52,9 @@ struct ovl_fs { > /* Did we take the inuse lock? */ > bool upperdir_locked; > bool workdir_locked; > + /* ovl inode sync list and lock */ > + spinlock_t ovl_sync_list_lock; > + struct list_head ovl_sync_list; > }; > > /* private information held for every overlayfs dentry */ > @@ -80,6 +83,8 @@ struct ovl_inode { > > /* synchronize copy up and more */ > struct mutex lock; > + /* ovl inode sync list */ > + struct list_head sync_list; > }; > > static inline struct ovl_inode *OVL_I(struct inode *inode) > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 76440fe..947bb83 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -17,6 +17,8 @@ > #include <linux/statfs.h> > #include <linux/seq_file.h> > #include <linux/posix_acl_xattr.h> > +#include <linux/writeback.h> > +#include <linux/blkdev.h> > #include "overlayfs.h" > > MODULE_AUTHOR("Miklos Szeredi <miklos@xxxxxxxxxx>"); > @@ -195,6 +197,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) > oi->__upperdentry = NULL; > oi->lower = NULL; > mutex_init(&oi->lock); > + INIT_LIST_HEAD(&oi->sync_list); > > return &oi->vfs_inode; > } > @@ -252,6 +255,112 @@ static void ovl_put_super(struct super_block *sb) > ovl_free_fs(ofs); > } > > +static void ovl_sync_inodes(struct super_block *sb) > +{ > + struct ovl_fs *ofs = sb->s_fs_info; > + struct inode *inode, *i_next; > + struct inode *upper_inode; > + struct blk_plug plug; > + > + struct writeback_control wbc = { > + .sync_mode = WB_SYNC_ALL, > + .for_sync = 1, > + .range_start = 0, > + .range_end = LLONG_MAX, > + .nr_to_write = LONG_MAX, > + }; > + > + blk_start_plug(&plug); > + spin_lock(&sb->s_inode_list_lock); > + list_for_each_entry_safe(inode, i_next, &sb->s_inodes, i_sb_list) { > + upper_inode = ovl_inode_upper(inode); > + if (!upper_inode) > + continue; > + > + spin_lock(&upper_inode->i_lock); > + if (upper_inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE) || > + list_empty(&upper_inode->i_io_list)) { > + spin_unlock(&upper_inode->i_lock); > + continue; > + } > + spin_unlock(&upper_inode->i_lock); > + > + if (!igrab(inode)) > + continue; > + > + if (!igrab(upper_inode)) { > + iput(inode); > + continue; > + } > + spin_unlock(&sb->s_inode_list_lock); > + > + sync_inode(upper_inode, &wbc); > + spin_lock(&ofs->ovl_sync_list_lock); > + if (list_empty(&OVL_I(inode)->sync_list)) > + list_add(&OVL_I(inode)->sync_list, &ofs->ovl_sync_list); > + else { > + iput(upper_inode); > + iput(inode); > + } > + spin_unlock(&ofs->ovl_sync_list_lock); > + > + if (need_resched()) { > + blk_finish_plug(&plug); > + cond_resched(); > + blk_start_plug(&plug); > + } > + spin_lock(&sb->s_inode_list_lock); > + } > + spin_unlock(&sb->s_inode_list_lock); > + blk_finish_plug(&plug); > +} > + > +static void ovl_wait_inodes(struct super_block *sb) > +{ > + struct ovl_fs *ofs = sb->s_fs_info; > + struct ovl_inode *oi, *oi_next; > + struct inode *upper_inode; > + > + mutex_lock(&sb->s_sync_lock); > + spin_lock(&ofs->ovl_sync_list_lock); > + list_for_each_entry_safe(oi, oi_next, &ofs->ovl_sync_list, sync_list) { > + upper_inode = ovl_inode_upper(&oi->vfs_inode); > + list_del_init(&oi->sync_list); > + spin_unlock(&ofs->ovl_sync_list_lock); > + > + filemap_fdatawait_keep_errors(upper_inode->i_mapping); > + iput(upper_inode); > + iput(&oi->vfs_inode); > + > + if (need_resched()) > + cond_resched(); > + > + spin_lock(&ofs->ovl_sync_list_lock); > + } > + spin_unlock(&ofs->ovl_sync_list_lock); > + mutex_unlock(&sb->s_sync_lock); > +} > + > +/** > + * ovl_sync_filesystem > + * @sb: The overlayfs super block > + * > + * Sync underlying dirty inodes in upper filesystem and wait for completion. > + */ > +static int ovl_sync_filesystem(struct super_block *sb) > +{ > + struct ovl_fs *ofs = sb->s_fs_info; > + struct super_block *upper_sb = ofs->upper_mnt->mnt_sb; > + > + ovl_sync_inodes(sb); > + ovl_wait_inodes(sb); > + > + if (upper_sb->s_op->sync_fs) > + upper_sb->s_op->sync_fs(upper_sb, 1); > + > + return sync_blockdev(upper_sb->s_bdev); > +} > + > /* Sync real dirty inodes in upper filesystem (if it exists) */ > static int ovl_sync_fs(struct super_block *sb, int wait) > { > @@ -266,17 +375,19 @@ static int ovl_sync_fs(struct super_block *sb, int wait) > * If this is a sync(2) call or an emergency sync, all the super blocks > * will be iterated, including upper_sb, so no need to do anything. > * > - * If this is a syncfs(2) call, then we do need to call > - * sync_filesystem() on upper_sb, but enough if we do it when being > + * If this is a syncfs(2) call, then we do need to call sync_inode() > + * on underlying dirty upper_inode, but enough if we do it when being > * called with wait == 1. > */ > if (!wait) > return 0; > > upper_sb = ofs->upper_mnt->mnt_sb; > + if (sb_rdonly(upper_sb)) > + return 0; > > down_read(&upper_sb->s_umount); > - ret = sync_filesystem(upper_sb); > + ret = ovl_sync_filesystem(sb); > up_read(&upper_sb->s_umount); > > return ret; > @@ -1206,6 +1317,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > if (!cred) > goto out_err; > > + spin_lock_init(&ofs->ovl_sync_list_lock); > + INIT_LIST_HEAD(&ofs->ovl_sync_list); > + > ofs->config.index = ovl_index_def; > err = ovl_parse_opt((char *) data, &ofs->config); > if (err) > -- > 1.8.3.1 > -- To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html