Re: [PATCH v2] vfs: fix link vs. rename race

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 21 Feb 2022, Miklos Szeredi wrote:
> There has been a longstanding race condition between rename(2) and link(2),
> when those operations are done in parallel:
> 
> 1. Moving a file to an existing target file (eg. mv file target)
> 2. Creating a link from the target file to a third file (eg. ln target
>    link)
> 
> By the time vfs_link() locks the target inode, it might already be unlinked
> by rename.  This results in vfs_link() returning -ENOENT in order to
> prevent linking to already unlinked files.  This check was introduced in
> v2.6.39 by commit aae8a97d3ec3 ("fs: Don't allow to create hardlink for
> deleted file").
> 
> This breaks apparent atomicity of rename(2), which is described in
> standards and the man page:
> 
>     "If newpath already exists, it will be atomically replaced, so that
>      there is no point at which another process attempting to access
>      newpath will find it missing."
> 
> The simplest fix is to exclude renames for the complete link operation.

Alternately, lock the "from" directory as well as the "to" directory.
That would mean using lock_rename() and generally copying the structure
of do_renameat2() into do_linkat()

I wonder if you could get a similar race trying to create a file in
(empty directory) /tmp/foo while /tmp/bar was being renamed over it.

NeilBrown


> 
> This patch introduces a global rw_semaphore that is locked for read in
> rename and for write in link.  To prevent excessive contention, do not take
> the lock in link on the first try.  If the source of the link was found to
> be unlinked, then retry with the lock held.
> 
> Reuse the lock_rename()/unlock_rename() helpers for the rename part.  This
> however needs special treatment for stacking fs (overlayfs, ecryptfs) to
> prevent possible deadlocks.  Introduce [un]lock_rename_stacked() for this
> purpose.
> 
> Reproducer can be found at:
> 
>   https://lore.kernel.org/all/20220216131814.GA2463301@xavier-xps/
> 
> Reported-by: Xavier Roche <xavier.roche@xxxxxxxxxxx>
> Link: https://lore.kernel.org/all/20220214210708.GA2167841@xavier-xps/
> Fixes: aae8a97d3ec3 ("fs: Don't allow to create hardlink for deleted file")
> Tested-by: Xavier Roche <xavier.roche@xxxxxxxxxxx>
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
> ---
>  fs/ecryptfs/inode.c    |  4 ++--
>  fs/namei.c             | 35 ++++++++++++++++++++++++++++++++---
>  fs/overlayfs/copy_up.c |  4 ++--
>  fs/overlayfs/dir.c     | 12 ++++++------
>  fs/overlayfs/util.c    |  4 ++--
>  include/linux/namei.h  |  4 ++++
>  6 files changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
> index 16d50dface59..f5c37599bd40 100644
> --- a/fs/ecryptfs/inode.c
> +++ b/fs/ecryptfs/inode.c
> @@ -596,7 +596,7 @@ ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
>  
>  	target_inode = d_inode(new_dentry);
>  
> -	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
> +	trap = lock_rename_stacked(lower_old_dir_dentry, lower_new_dir_dentry);
>  	dget(lower_new_dentry);
>  	rc = -EINVAL;
>  	if (lower_old_dentry->d_parent != lower_old_dir_dentry)
> @@ -631,7 +631,7 @@ ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
>  		fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
>  out_lock:
>  	dput(lower_new_dentry);
> -	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
> +	unlock_rename_stacked(lower_old_dir_dentry, lower_new_dir_dentry);
>  	return rc;
>  }
>  
> diff --git a/fs/namei.c b/fs/namei.c
> index 3f1829b3ab5b..27e671c354a6 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -122,6 +122,8 @@
>   * PATH_MAX includes the nul terminator --RR.
>   */
>  
> +static DECLARE_RWSEM(link_rwsem);
> +
>  #define EMBEDDED_NAME_MAX	(PATH_MAX - offsetof(struct filename, iname))
>  
>  struct filename *
> @@ -2954,10 +2956,11 @@ static inline int may_create(struct user_namespace *mnt_userns,
>  	return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
>  }
>  
> +
>  /*
>   * p1 and p2 should be directories on the same fs.
>   */
> -struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
> +struct dentry *lock_rename_stacked(struct dentry *p1, struct dentry *p2)
>  {
>  	struct dentry *p;
>  
> @@ -2986,9 +2989,16 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
>  	inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
>  	return NULL;
>  }
> +EXPORT_SYMBOL(lock_rename_stacked);
> +
> +struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
> +{
> +	down_read(&link_rwsem);
> +	return lock_rename_stacked(p1, p2);
> +}
>  EXPORT_SYMBOL(lock_rename);
>  
> -void unlock_rename(struct dentry *p1, struct dentry *p2)
> +void unlock_rename_stacked(struct dentry *p1, struct dentry *p2)
>  {
>  	inode_unlock(p1->d_inode);
>  	if (p1 != p2) {
> @@ -2996,6 +3006,13 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
>  		mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
>  	}
>  }
> +EXPORT_SYMBOL(unlock_rename_stacked);
> +
> +void unlock_rename(struct dentry *p1, struct dentry *p2)
> +{
> +	unlock_rename_stacked(p1, p2);
> +	up_read(&link_rwsem);
> +}
>  EXPORT_SYMBOL(unlock_rename);
>  
>  /**
> @@ -4456,6 +4473,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
>  	struct path old_path, new_path;
>  	struct inode *delegated_inode = NULL;
>  	int how = 0;
> +	bool lock = false;
>  	int error;
>  
>  	if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
> @@ -4474,10 +4492,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
>  
>  	if (flags & AT_SYMLINK_FOLLOW)
>  		how |= LOOKUP_FOLLOW;
> +retry_lock:
> +	if (lock)
> +		down_write(&link_rwsem);
>  retry:
>  	error = filename_lookup(olddfd, old, how, &old_path, NULL);
>  	if (error)
> -		goto out_putnames;
> +		goto out_unlock_link;
>  
>  	new_dentry = filename_create(newdfd, new, &new_path,
>  					(how & LOOKUP_REVAL));
> @@ -4511,8 +4532,16 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
>  		how |= LOOKUP_REVAL;
>  		goto retry;
>  	}
> +	if (!lock && error == -ENOENT) {
> +		path_put(&old_path);
> +		lock = true;
> +		goto retry_lock;
> +	}
>  out_putpath:
>  	path_put(&old_path);
> +out_unlock_link:
> +	if (lock)
> +		up_write(&link_rwsem);
>  out_putnames:
>  	putname(old);
>  	putname(new);
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index e040970408d4..911c3cec43c2 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -670,7 +670,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
>  
>  	/* workdir and destdir could be the same when copying up to indexdir */
>  	err = -EIO;
> -	if (lock_rename(c->workdir, c->destdir) != NULL)
> +	if (lock_rename_stacked(c->workdir, c->destdir) != NULL)
>  		goto unlock;
>  
>  	err = ovl_prep_cu_creds(c->dentry, &cc);
> @@ -711,7 +711,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
>  	if (S_ISDIR(inode->i_mode))
>  		ovl_set_flag(OVL_WHITEOUTS, inode);
>  unlock:
> -	unlock_rename(c->workdir, c->destdir);
> +	unlock_rename_stacked(c->workdir, c->destdir);
>  
>  	return err;
>  
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index f18490813170..fea397666174 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -416,7 +416,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
>  
>  	ovl_cleanup_whiteouts(upper, list);
>  	ovl_cleanup(wdir, upper);
> -	unlock_rename(workdir, upperdir);
> +	unlock_rename_stacked(workdir, upperdir);
>  
>  	/* dentry's upper doesn't match now, get rid of it */
>  	d_drop(dentry);
> @@ -427,7 +427,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
>  	ovl_cleanup(wdir, opaquedir);
>  	dput(opaquedir);
>  out_unlock:
> -	unlock_rename(workdir, upperdir);
> +	unlock_rename_stacked(workdir, upperdir);
>  out:
>  	return ERR_PTR(err);
>  }
> @@ -551,7 +551,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
>  out_dput:
>  	dput(upper);
>  out_unlock:
> -	unlock_rename(workdir, upperdir);
> +	unlock_rename_stacked(workdir, upperdir);
>  out:
>  	if (!hardlink) {
>  		posix_acl_release(acl);
> @@ -790,7 +790,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
>  out_dput_upper:
>  	dput(upper);
>  out_unlock:
> -	unlock_rename(workdir, upperdir);
> +	unlock_rename_stacked(workdir, upperdir);
>  out_dput:
>  	dput(opaquedir);
>  out:
> @@ -1187,7 +1187,7 @@ static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
>  		}
>  	}
>  
> -	trap = lock_rename(new_upperdir, old_upperdir);
> +	trap = lock_rename_stacked(new_upperdir, old_upperdir);
>  
>  	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
>  				   old->d_name.len);
> @@ -1281,7 +1281,7 @@ static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
>  out_dput_old:
>  	dput(olddentry);
>  out_unlock:
> -	unlock_rename(new_upperdir, old_upperdir);
> +	unlock_rename_stacked(new_upperdir, old_upperdir);
>  out_revert_creds:
>  	revert_creds(old_cred);
>  	if (update_nlink)
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index f48284a2a896..9358282278b1 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -930,13 +930,13 @@ int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
>  		goto err;
>  
>  	/* Workdir should not be subdir of upperdir and vice versa */
> -	if (lock_rename(workdir, upperdir) != NULL)
> +	if (lock_rename_stacked(workdir, upperdir) != NULL)
>  		goto err_unlock;
>  
>  	return 0;
>  
>  err_unlock:
> -	unlock_rename(workdir, upperdir);
> +	unlock_rename_stacked(workdir, upperdir);
>  err:
>  	pr_err("failed to lock workdir+upperdir\n");
>  	return -EIO;
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index e89329bb3134..0a87bb0d56ce 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -77,6 +77,10 @@ extern int follow_up(struct path *);
>  extern struct dentry *lock_rename(struct dentry *, struct dentry *);
>  extern void unlock_rename(struct dentry *, struct dentry *);
>  
> +/* Special version of the above for stacking filesystems */
> +extern struct dentry *lock_rename_stacked(struct dentry *, struct dentry *);
> +extern void unlock_rename_stacked(struct dentry *, struct dentry *);
> +
>  extern int __must_check nd_jump_link(struct path *path);
>  
>  static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
> -- 
> 2.34.1
> 
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux