Re: [RFC][PATCH 4/4] ovl: allow concurrent copy up data of different files

Vivek Goyal <vgoyal@xxxxxxxxxx> · Tue, 15 Nov 2016 10:56:43 -0500

On Sat, Nov 12, 2016 at 09:36:04PM +0200, Amir Goldstein wrote:
> Currently, all copy operations are serialized by taking the
> lock_rename(workdir, upperdir) locks for the entire copy up
> operation, including the data copy up.
> 
> Copying up data may take a long time with large files and
> holding s_vfs_rename_mutex during that time blocks all
> rename and copy up operations on the entire underlying fs.
> 
> This change addresses this problem by changing the copy up
> locking scheme for different types of files as follows.
> 
> Directories:
>   <maybe> inode_lock(ovl_inode)

Hi Amir,

What does <maybe> mean here? Is it optional?

>     lock_rename(workdir, upperdir)
>       copy up dir to workdir
>       move dir to upperdir
> 
> Special and zero size files:
>   inode_lock(ovl_inode)
>     lock_rename(workdir, upperdir)
>       copy up file to workdir (no data)
>       move file to upperdir
> 

If we are copying up directories and special and zero file under
lock_rename() and don't drop it during the whole operation, then
why do we need to take ovl_inode lock.

IOW, current code seems to be just doing lock_rename(workdir, upperdir)
for the copy up operation. But now this new code also requires
inode_lock(ovl_inode) and I am trying to understand why.

Thanks
Vivek

> Regular files with data:
>   inode_lock(ovl_inode)
>     lock_rename(workdir, upperdir)
>       copy up file to workdir (no data)
>     unlock_rename(workdir, upperdir)
>       copy data to file in workdir
>     lock_rename(workdir, upperdir)
>       move file to upperdir
> 
> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
> ---
>  fs/overlayfs/copy_up.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++---
>  fs/overlayfs/inode.c   |  3 +++
>  2 files changed, 69 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index a16127b..1b9705e 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -230,6 +230,44 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
>  	return err;
>  }
>  
> +/*
> + * Called with dentry->d_inode lock held only for the last (leaf) copy up
> + * from __ovl_copy_up(), so it is NOT held when called for ancestor
> + * directory from __ovl_copy_up()
> + *
> + * Called with lock_rename(workdir, upperdir) locks held.
> + *
> + * lock_rename() locks remain locked throughout the copy up
> + * of non regular files and zero sized regular files.
> + *
> + * lock_rename() locks are released during ovl_copy_up_data()
> + * of non zero sized regular files. During this time, the overlay
> + * dentry->d_inode lock is still held to avoid concurrent
> + * copy up of files with data.
> + *
> + * Maybe a better description of this locking scheme:
> + *
> + * Directories:
> + *   <maybe> inode_lock(ovl_inode)
> + *     lock_rename(workdir, upperdir)
> + *       copy up dir to workdir
> + *       move dir to upperdir
> + *
> + * Special and zero size files:
> + *   inode_lock(ovl_inode)
> + *     lock_rename(workdir, upperdir)
> + *       copy up file to workdir (no data)
> + *       move file to upperdir
> + *
> + * Regular files with data:
> + *  inode_lock(ovl_inode)
> + *    lock_rename(workdir, upperdir)
> + *      copy up file to workdir (no data)
> + *    unlock_rename(workdir, upperdir)
> + *      copy data to file in workdir
> + *    lock_rename(workdir, upperdir)
> + *      move file to upperdir
> + */
>  static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
>  			      struct dentry *dentry, struct path *lowerpath,
>  			      struct kstat *stat, const char *link)
> @@ -274,16 +312,39 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
>  	if (err)
>  		goto out2;
>  
> -	if (S_ISREG(stat->mode)) {
> +	if (S_ISREG(stat->mode) && stat->size > 0) {
>  		struct path upperpath;
>  
>  		ovl_path_upper(dentry, &upperpath);
>  		BUG_ON(upperpath.dentry != NULL);
>  		upperpath.dentry = newdentry;
>  
> +		/*
> +		 * Release rename locks, because copy data may take a long time,
> +		 * and holding s_vfs_rename_mutex will block all rename and
> +		 * copy up operations on the entire underlying fs.
> +		 * We still hold the overlay inode lock to avoid concurrent
> +		 * copy up of this file.
> +		 */
> +		unlock_rename(workdir, upperdir);
> +
>  		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
> +
> +		/*
> +		 * Re-aquire rename locks, before moving copied file into place.
> +		 */
> +		if (unlikely(lock_rename(workdir, upperdir) != NULL)) {
> +			pr_err("overlayfs: failed to re-aquire lock_rename\n");
> +			err = -EIO;
> +		}
> +
>  		if (err)
>  			goto out_cleanup;
> +
> +		if (WARN_ON(ovl_dentry_is_upper(dentry))) {
> +			/* Raced with another copy-up? This shouldn't happen */
> +			goto out_cleanup;
> +		}
>  	}
>  
>  	err = ovl_copy_xattr(lowerpath->dentry, newdentry);
> @@ -366,15 +427,14 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>  			return PTR_ERR(link);
>  	}
>  
> -	err = -EIO;
> -	if (lock_rename(workdir, upperdir) != NULL) {
> +	if (unlikely(lock_rename(workdir, upperdir) != NULL)) {
>  		pr_err("overlayfs: failed to lock workdir+upperdir\n");
> +		err = -EIO;
>  		goto out_unlock;
>  	}
>  
>  	if (ovl_dentry_is_upper(dentry)) {
>  		/* Raced with another copy-up?  Nothing to do, then... */
> -		err = 0;
>  		goto out_unlock;
>  	}
>  
> @@ -433,11 +493,13 @@ static int __ovl_copy_up(struct dentry *dentry, int flags)
>  	return err;
>  }
>  
> +/* Called with dentry->d_inode lock held */
>  int ovl_copy_up(struct dentry *dentry)
>  {
>  	return __ovl_copy_up(dentry, 0);
>  }
>  
> +/* Called with dentry->d_inode lock held */
>  int ovl_copy_up_open(struct dentry *dentry, int flags)
>  {
>  	return __ovl_copy_up(dentry, flags);
> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
> index 7abae00..532b0d5 100644
> --- a/fs/overlayfs/inode.c
> +++ b/fs/overlayfs/inode.c
> @@ -251,11 +251,14 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
>  
>  	type = ovl_path_real(dentry, &realpath);
>  	if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
> +		/* Take the overlay inode lock to avoid concurrent copy-up */
> +		inode_lock(dentry->d_inode);
>  		err = ovl_want_write(dentry);
>  		if (!err) {
>  			err = ovl_copy_up_open(dentry, file_flags);
>  			ovl_drop_write(dentry);
>  		}
> +		inode_unlock(dentry->d_inode);
>  	}
>  
>  	return err;
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html