Re: [PATCH 3/7] vfs: add renameat2 syscall and cross-rename

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 01-10-13 18:00:35, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@xxxxxxx>
> 
> Add new renameat2 syscall, which is the same as renameat with an added
> flags argument.
> 
> If flags is zero then this is a plain overwriting rename.  If flags contain
> RENAME_EXCHANGE then exchange source and destination files.  There's no
> restriction on the type of the files; e.g. a directory can be exchanged
> with a symlink.
  It's not completely clear to me what should happen if RENAME_EXCHANGE is
set but destination doesn't exist. Return -ENOENT or just do standard
rename?

								Honza
> 
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
> ---
>  arch/x86/syscalls/syscall_64.tbl |   1 +
>  fs/dcache.c                      |  46 ++++++++++---
>  fs/namei.c                       | 139 ++++++++++++++++++++++++++++++---------
>  include/linux/dcache.h           |   1 +
>  include/linux/fs.h               |   2 +
>  include/uapi/linux/fs.h          |   2 +
>  6 files changed, 152 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> index 38ae65d..fafd734 100644
> --- a/arch/x86/syscalls/syscall_64.tbl
> +++ b/arch/x86/syscalls/syscall_64.tbl
> @@ -320,6 +320,7 @@
>  311	64	process_vm_writev	sys_process_vm_writev
>  312	common	kcmp			sys_kcmp
>  313	common	finit_module		sys_finit_module
> +314	common	renameat2		sys_renameat2
>  
>  #
>  # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 4100030..1735bac 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -2495,12 +2495,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
>  			dentry->d_name.name = dentry->d_iname;
>  		} else {
>  			/*
> -			 * Both are internal.  Just copy target to dentry
> +			 * Both are internal.
>  			 */
> -			memcpy(dentry->d_iname, target->d_name.name,
> -					target->d_name.len + 1);
> -			dentry->d_name.len = target->d_name.len;
> -			return;
> +			unsigned int i;
> +			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
> +			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
> +				swap(((long *) &dentry->d_iname)[i],
> +				     ((long *) &target->d_iname)[i]);
> +			}
>  		}
>  	}
>  	swap(dentry->d_name.len, target->d_name.len);
> @@ -2557,13 +2559,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
>   * __d_move - move a dentry
>   * @dentry: entry to move
>   * @target: new dentry
> + * @exchange: exchange the two dentries
>   *
>   * Update the dcache to reflect the move of a file name. Negative
>   * dcache entries should not be moved in this way. Caller must hold
>   * rename_lock, the i_mutex of the source and target directories,
>   * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
>   */
> -static void __d_move(struct dentry * dentry, struct dentry * target)
> +static void __d_move(struct dentry *dentry, struct dentry *target,
> +		     bool exchange)
>  {
>  	if (!dentry->d_inode)
>  		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
> @@ -2587,6 +2591,10 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
>  
>  	/* Unhash the target: dput() will then get rid of it */
>  	__d_drop(target);
> +	if (exchange) {
> +		__d_rehash(target,
> +			   d_hash(dentry->d_parent, dentry->d_name.hash));
> +	}
>  
>  	list_del(&dentry->d_u.d_child);
>  	list_del(&target->d_u.d_child);
> @@ -2613,6 +2621,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
>  	write_seqcount_end(&dentry->d_seq);
>  
>  	dentry_unlock_parents_for_move(dentry, target);
> +	if (exchange)
> +		fsnotify_d_move(target);
>  	spin_unlock(&target->d_lock);
>  	fsnotify_d_move(dentry);
>  	spin_unlock(&dentry->d_lock);
> @@ -2630,11 +2640,31 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
>  void d_move(struct dentry *dentry, struct dentry *target)
>  {
>  	write_seqlock(&rename_lock);
> -	__d_move(dentry, target);
> +	__d_move(dentry, target, false);
>  	write_sequnlock(&rename_lock);
>  }
>  EXPORT_SYMBOL(d_move);
>  
> +/*
> + * d_exchange - exchange two dentries
> + * @dentry1: first dentry
> + * @dentry2: second dentry
> + */
> +void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
> +{
> +	write_seqlock(&rename_lock);
> +
> +	WARN_ON(!dentry1->d_inode);
> +	WARN_ON(!dentry2->d_inode);
> +	WARN_ON(IS_ROOT(dentry1));
> +	WARN_ON(IS_ROOT(dentry2));
> +
> +	__d_move(dentry1, dentry2, true);
> +
> +	write_sequnlock(&rename_lock);
> +}
> +
> +
>  /**
>   * d_ancestor - search for an ancestor
>   * @p1: ancestor dentry
> @@ -2682,7 +2712,7 @@ static struct dentry *__d_unalias(struct inode *inode,
>  	m2 = &alias->d_parent->d_inode->i_mutex;
>  out_unalias:
>  	if (likely(!d_mountpoint(alias))) {
> -		__d_move(alias, dentry);
> +		__d_move(alias, dentry, false);
>  		ret = alias;
>  	}
>  out_err:
> diff --git a/fs/namei.c b/fs/namei.c
> index 7ec6a12..55700b3 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3963,14 +3963,18 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
>   *	   ->i_mutex on parents, which works but leads to some truly excessive
>   *	   locking].
>   */
> -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> -	       struct inode *new_dir, struct dentry *new_dentry)
> +static int vfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
> +		       struct inode *new_dir, struct dentry *new_dentry,
> +		       unsigned int flags)
>  {
>  	int error;
>  	const unsigned char *old_name;
>  	struct inode *source = old_dentry->d_inode;
>  	struct inode *target = new_dentry->d_inode;
>  	bool is_dir = S_ISDIR(source->i_mode);
> +	bool new_is_dir = target ? S_ISDIR(target->i_mode) : false;
> +	bool overwrite = !(flags & RENAME_EXCHANGE);
> +	unsigned max_links = new_dir->i_sb->s_max_links;
>  
>  	if (source == target)
>  		return 0;
> @@ -3981,74 +3985,116 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
>  
>  	if (!target)
>  		error = may_create(new_dir, new_dentry);
> -	else
> +	else if (overwrite)
>  		error = may_delete(new_dir, new_dentry, is_dir);
> +	else
> +		error = may_delete(new_dir, new_dentry, new_is_dir);
>  	if (error)
>  		return error;
>  
> -	if (!old_dir->i_op->rename)
> +	if (!old_dir->i_op->rename && !old_dir->i_op->rename2)
>  		return -EPERM;
>  
> +	if (flags && !old_dir->i_op->rename2)
> +		return -EOPNOTSUPP;
> +
>  	/*
>  	 * If we are going to change the parent - check write permissions,
>  	 * we'll need to flip '..'.
>  	 */
> -	if (is_dir && new_dir != old_dir) {
> -		error = inode_permission(source, MAY_WRITE);
> -		if (error)
> -			return error;
> +	if (new_dir != old_dir) {
> +		if (is_dir) {
> +			error = inode_permission(source, MAY_WRITE);
> +			if (error)
> +				return error;
> +		}
> +		if (!overwrite && new_is_dir) {
> +			error = inode_permission(target, MAY_WRITE);
> +			if (error)
> +				return error;
> +		}
>  	}
>  
>  	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
>  	if (error)
>  		return error;
>  
> +	if (!overwrite) {
> +		error = security_inode_rename(new_dir, new_dentry,
> +					      old_dir, old_dentry);
> +		if (error)
> +			return error;
> +	}
> +
>  	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
>  	dget(new_dentry);
> -	if (target)
> +	if (overwrite && target)
>  		mutex_lock(&target->i_mutex);
>  
>  	error = -EBUSY;
>  	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
>  		goto out;
>  
> -	if (is_dir) {
> -		unsigned max_links = new_dir->i_sb->s_max_links;
> -
> +	if (max_links && new_dir != old_dir) {
>  		error = -EMLINK;
> -		if (max_links && !target && new_dir != old_dir &&
> -		    new_dir->i_nlink >= max_links)
> +		if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
>  			goto out;
> +		if (!overwrite && !is_dir && new_is_dir &&
> +		    old_dir->i_nlink > max_links)
> +			goto out;
> +	}
> +
> +	if (overwrite && is_dir && target)
> +		shrink_dcache_parent(new_dentry);
>  
> -		if (target)
> -			shrink_dcache_parent(new_dentry);
> +	if (old_dir->i_op->rename2) {
> +		error = old_dir->i_op->rename2(old_dir, old_dentry,
> +					       new_dir, new_dentry, flags);
> +	} else {
> +		error = old_dir->i_op->rename(old_dir, old_dentry,
> +					      new_dir, new_dentry);
>  	}
>  
> -	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
>  	if (error)
>  		goto out;
>  
> -	if (target) {
> +	if (overwrite && target) {
>  		if (is_dir)
>  			target->i_flags |= S_DEAD;
>  		dont_mount(new_dentry);
>  	}
> -	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
> -		d_move(old_dentry, new_dentry);
> +	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
> +		if (overwrite)
> +			d_move(old_dentry, new_dentry);
> +		else
> +			d_exchange(old_dentry, new_dentry);
> +	}
>  out:
> -	if (target)
> +	if (overwrite && target)
>  		mutex_unlock(&target->i_mutex);
>  	dput(new_dentry);
> -	if (!error)
> +	if (!error) {
>  		fsnotify_move(old_dir, new_dir, old_name, is_dir,
> -			      target, old_dentry);
> +			      overwrite ? target : NULL, old_dentry);
> +		if (!overwrite) {
> +			fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
> +				      new_is_dir, NULL, new_dentry);
> +		}
> +	}
>  	fsnotify_oldname_free(old_name);
>  
>  	return error;
>  }
>  
> -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
> -		int, newdfd, const char __user *, newname)
> +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> +	       struct inode *new_dir, struct dentry *new_dentry)
> +{
> +	return vfs_rename2(old_dir, old_dentry, new_dir, new_dentry, 0);
> +}
> +
> +
> +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
> +		int, newdfd, const char __user *, newname, unsigned int, flags)
>  {
>  	struct dentry *old_dir, *new_dir;
>  	struct dentry *old_dentry, *new_dentry;
> @@ -4058,7 +4104,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
>  	struct filename *to;
>  	unsigned int lookup_flags = 0;
>  	bool should_retry = false;
> +	bool overwrite = !(flags & RENAME_EXCHANGE);
>  	int error;
> +
> +	error = -EOPNOTSUPP;
> +	if (flags & ~RENAME_EXCHANGE)
> +		goto exit;
> +
>  retry:
>  	from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
>  	if (IS_ERR(from)) {
> @@ -4091,7 +4143,8 @@ retry:
>  
>  	oldnd.flags &= ~LOOKUP_PARENT;
>  	newnd.flags &= ~LOOKUP_PARENT;
> -	newnd.flags |= LOOKUP_RENAME_TARGET;
> +	if (overwrite)
> +		newnd.flags |= LOOKUP_RENAME_TARGET;
>  
>  	trap = lock_rename(new_dir, old_dir);
>  
> @@ -4108,7 +4161,7 @@ retry:
>  		error = -ENOTDIR;
>  		if (oldnd.last.name[oldnd.last.len])
>  			goto exit4;
> -		if (newnd.last.name[newnd.last.len])
> +		if (overwrite && newnd.last.name[newnd.last.len])
>  			goto exit4;
>  	}
>  	/* source should not be ancestor of target */
> @@ -4119,8 +4172,19 @@ retry:
>  	error = PTR_ERR(new_dentry);
>  	if (IS_ERR(new_dentry))
>  		goto exit4;
> +	if (!overwrite) {
> +		error = -ENOENT;
> +		if (!new_dentry->d_inode)
> +			goto exit4;
> +
> +		if (!S_ISDIR(new_dentry->d_inode->i_mode)) {
> +			error = -ENOTDIR;
> +			if (newnd.last.name[newnd.last.len])
> +				goto exit4;
> +		}
> +	}
>  	/* target should not be an ancestor of source */
> -	error = -ENOTEMPTY;
> +	error = overwrite ? -ENOTEMPTY : -EINVAL;
>  	if (new_dentry == trap)
>  		goto exit5;
>  
> @@ -4128,8 +4192,15 @@ retry:
>  				     &newnd.path, new_dentry);
>  	if (error)
>  		goto exit5;
> -	error = vfs_rename(old_dir->d_inode, old_dentry,
> -				   new_dir->d_inode, new_dentry);
> +	if (!overwrite) {
> +		error = security_path_rename(&newnd.path, new_dentry,
> +					     &oldnd.path, old_dentry);
> +		if (error)
> +			goto exit5;
> +	}
> +
> +	error = vfs_rename2(old_dir->d_inode, old_dentry,
> +			    new_dir->d_inode, new_dentry, flags);
>  exit5:
>  	dput(new_dentry);
>  exit4:
> @@ -4154,9 +4225,15 @@ exit:
>  	return error;
>  }
>  
> +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
> +		int, newdfd, const char __user *, newname)
> +{
> +	return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
> +}
> +
>  SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
>  {
> -	return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
> +	return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
>  }
>  
>  int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index 59066e0..ce5ebed 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -297,6 +297,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *);
>  
>  /* used for rename() and baskets */
>  extern void d_move(struct dentry *, struct dentry *);
> +extern void d_exchange(struct dentry *, struct dentry *);
>  extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
>  
>  /* appendix may either be NULL or be used for transname suffixes */
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 3f40547..71c6cf9 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1572,6 +1572,8 @@ struct inode_operations {
>  	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
>  	int (*rename) (struct inode *, struct dentry *,
>  			struct inode *, struct dentry *);
> +	int (*rename2) (struct inode *, struct dentry *,
> +			struct inode *, struct dentry *, unsigned int);
>  	int (*setattr) (struct dentry *, struct iattr *);
>  	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
>  	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 6c28b61..ebdafb6 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -35,6 +35,8 @@
>  #define SEEK_HOLE	4	/* seek to the next hole */
>  #define SEEK_MAX	SEEK_HOLE
>  
> +#define RENAME_EXCHANGE	(1 << 0)	/* Exchange source and dest */
> +
>  struct fstrim_range {
>  	__u64 start;
>  	__u64 len;
> -- 
> 1.8.1.4
> 
-- 
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux