On Tue 01-10-13 18:00:35, Miklos Szeredi wrote: > From: Miklos Szeredi <mszeredi@xxxxxxx> > > Add new renameat2 syscall, which is the same as renameat with an added > flags argument. > > If flags is zero then this is a plain overwriting rename. If flags contain > RENAME_EXCHANGE then exchange source and destination files. There's no > restriction on the type of the files; e.g. a directory can be exchanged > with a symlink. It's not completely clear to me what should happen if RENAME_EXCHANGE is set but destination doesn't exist. Return -ENOENT or just do standard rename? Honza > > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx> > --- > arch/x86/syscalls/syscall_64.tbl | 1 + > fs/dcache.c | 46 ++++++++++--- > fs/namei.c | 139 ++++++++++++++++++++++++++++++--------- > include/linux/dcache.h | 1 + > include/linux/fs.h | 2 + > include/uapi/linux/fs.h | 2 + > 6 files changed, 152 insertions(+), 39 deletions(-) > > diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl > index 38ae65d..fafd734 100644 > --- a/arch/x86/syscalls/syscall_64.tbl > +++ b/arch/x86/syscalls/syscall_64.tbl > @@ -320,6 +320,7 @@ > 311 64 process_vm_writev sys_process_vm_writev > 312 common kcmp sys_kcmp > 313 common finit_module sys_finit_module > +314 common renameat2 sys_renameat2 > > # > # x32-specific system call numbers start at 512 to avoid cache impact > diff --git a/fs/dcache.c b/fs/dcache.c > index 4100030..1735bac 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -2495,12 +2495,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) > dentry->d_name.name = dentry->d_iname; > } else { > /* > - * Both are internal. Just copy target to dentry > + * Both are internal. > */ > - memcpy(dentry->d_iname, target->d_name.name, > - target->d_name.len + 1); > - dentry->d_name.len = target->d_name.len; > - return; > + unsigned int i; > + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); > + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { > + swap(((long *) &dentry->d_iname)[i], > + ((long *) &target->d_iname)[i]); > + } > } > } > swap(dentry->d_name.len, target->d_name.len); > @@ -2557,13 +2559,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, > * __d_move - move a dentry > * @dentry: entry to move > * @target: new dentry > + * @exchange: exchange the two dentries > * > * Update the dcache to reflect the move of a file name. Negative > * dcache entries should not be moved in this way. Caller must hold > * rename_lock, the i_mutex of the source and target directories, > * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). > */ > -static void __d_move(struct dentry * dentry, struct dentry * target) > +static void __d_move(struct dentry *dentry, struct dentry *target, > + bool exchange) > { > if (!dentry->d_inode) > printk(KERN_WARNING "VFS: moving negative dcache entry\n"); > @@ -2587,6 +2591,10 @@ static void __d_move(struct dentry * dentry, struct dentry * target) > > /* Unhash the target: dput() will then get rid of it */ > __d_drop(target); > + if (exchange) { > + __d_rehash(target, > + d_hash(dentry->d_parent, dentry->d_name.hash)); > + } > > list_del(&dentry->d_u.d_child); > list_del(&target->d_u.d_child); > @@ -2613,6 +2621,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) > write_seqcount_end(&dentry->d_seq); > > dentry_unlock_parents_for_move(dentry, target); > + if (exchange) > + fsnotify_d_move(target); > spin_unlock(&target->d_lock); > fsnotify_d_move(dentry); > spin_unlock(&dentry->d_lock); > @@ -2630,11 +2640,31 @@ static void __d_move(struct dentry * dentry, struct dentry * target) > void d_move(struct dentry *dentry, struct dentry *target) > { > write_seqlock(&rename_lock); > - __d_move(dentry, target); > + __d_move(dentry, target, false); > write_sequnlock(&rename_lock); > } > EXPORT_SYMBOL(d_move); > > +/* > + * d_exchange - exchange two dentries > + * @dentry1: first dentry > + * @dentry2: second dentry > + */ > +void d_exchange(struct dentry *dentry1, struct dentry *dentry2) > +{ > + write_seqlock(&rename_lock); > + > + WARN_ON(!dentry1->d_inode); > + WARN_ON(!dentry2->d_inode); > + WARN_ON(IS_ROOT(dentry1)); > + WARN_ON(IS_ROOT(dentry2)); > + > + __d_move(dentry1, dentry2, true); > + > + write_sequnlock(&rename_lock); > +} > + > + > /** > * d_ancestor - search for an ancestor > * @p1: ancestor dentry > @@ -2682,7 +2712,7 @@ static struct dentry *__d_unalias(struct inode *inode, > m2 = &alias->d_parent->d_inode->i_mutex; > out_unalias: > if (likely(!d_mountpoint(alias))) { > - __d_move(alias, dentry); > + __d_move(alias, dentry, false); > ret = alias; > } > out_err: > diff --git a/fs/namei.c b/fs/namei.c > index 7ec6a12..55700b3 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -3963,14 +3963,18 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname > * ->i_mutex on parents, which works but leads to some truly excessive > * locking]. > */ > -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, > - struct inode *new_dir, struct dentry *new_dentry) > +static int vfs_rename2(struct inode *old_dir, struct dentry *old_dentry, > + struct inode *new_dir, struct dentry *new_dentry, > + unsigned int flags) > { > int error; > const unsigned char *old_name; > struct inode *source = old_dentry->d_inode; > struct inode *target = new_dentry->d_inode; > bool is_dir = S_ISDIR(source->i_mode); > + bool new_is_dir = target ? S_ISDIR(target->i_mode) : false; > + bool overwrite = !(flags & RENAME_EXCHANGE); > + unsigned max_links = new_dir->i_sb->s_max_links; > > if (source == target) > return 0; > @@ -3981,74 +3985,116 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, > > if (!target) > error = may_create(new_dir, new_dentry); > - else > + else if (overwrite) > error = may_delete(new_dir, new_dentry, is_dir); > + else > + error = may_delete(new_dir, new_dentry, new_is_dir); > if (error) > return error; > > - if (!old_dir->i_op->rename) > + if (!old_dir->i_op->rename && !old_dir->i_op->rename2) > return -EPERM; > > + if (flags && !old_dir->i_op->rename2) > + return -EOPNOTSUPP; > + > /* > * If we are going to change the parent - check write permissions, > * we'll need to flip '..'. > */ > - if (is_dir && new_dir != old_dir) { > - error = inode_permission(source, MAY_WRITE); > - if (error) > - return error; > + if (new_dir != old_dir) { > + if (is_dir) { > + error = inode_permission(source, MAY_WRITE); > + if (error) > + return error; > + } > + if (!overwrite && new_is_dir) { > + error = inode_permission(target, MAY_WRITE); > + if (error) > + return error; > + } > } > > error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); > if (error) > return error; > > + if (!overwrite) { > + error = security_inode_rename(new_dir, new_dentry, > + old_dir, old_dentry); > + if (error) > + return error; > + } > + > old_name = fsnotify_oldname_init(old_dentry->d_name.name); > dget(new_dentry); > - if (target) > + if (overwrite && target) > mutex_lock(&target->i_mutex); > > error = -EBUSY; > if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) > goto out; > > - if (is_dir) { > - unsigned max_links = new_dir->i_sb->s_max_links; > - > + if (max_links && new_dir != old_dir) { > error = -EMLINK; > - if (max_links && !target && new_dir != old_dir && > - new_dir->i_nlink >= max_links) > + if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) > goto out; > + if (!overwrite && !is_dir && new_is_dir && > + old_dir->i_nlink > max_links) > + goto out; > + } > + > + if (overwrite && is_dir && target) > + shrink_dcache_parent(new_dentry); > > - if (target) > - shrink_dcache_parent(new_dentry); > + if (old_dir->i_op->rename2) { > + error = old_dir->i_op->rename2(old_dir, old_dentry, > + new_dir, new_dentry, flags); > + } else { > + error = old_dir->i_op->rename(old_dir, old_dentry, > + new_dir, new_dentry); > } > > - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); > if (error) > goto out; > > - if (target) { > + if (overwrite && target) { > if (is_dir) > target->i_flags |= S_DEAD; > dont_mount(new_dentry); > } > - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) > - d_move(old_dentry, new_dentry); > + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { > + if (overwrite) > + d_move(old_dentry, new_dentry); > + else > + d_exchange(old_dentry, new_dentry); > + } > out: > - if (target) > + if (overwrite && target) > mutex_unlock(&target->i_mutex); > dput(new_dentry); > - if (!error) > + if (!error) { > fsnotify_move(old_dir, new_dir, old_name, is_dir, > - target, old_dentry); > + overwrite ? target : NULL, old_dentry); > + if (!overwrite) { > + fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, > + new_is_dir, NULL, new_dentry); > + } > + } > fsnotify_oldname_free(old_name); > > return error; > } > > -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, > - int, newdfd, const char __user *, newname) > +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, > + struct inode *new_dir, struct dentry *new_dentry) > +{ > + return vfs_rename2(old_dir, old_dentry, new_dir, new_dentry, 0); > +} > + > + > +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, > + int, newdfd, const char __user *, newname, unsigned int, flags) > { > struct dentry *old_dir, *new_dir; > struct dentry *old_dentry, *new_dentry; > @@ -4058,7 +4104,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, > struct filename *to; > unsigned int lookup_flags = 0; > bool should_retry = false; > + bool overwrite = !(flags & RENAME_EXCHANGE); > int error; > + > + error = -EOPNOTSUPP; > + if (flags & ~RENAME_EXCHANGE) > + goto exit; > + > retry: > from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); > if (IS_ERR(from)) { > @@ -4091,7 +4143,8 @@ retry: > > oldnd.flags &= ~LOOKUP_PARENT; > newnd.flags &= ~LOOKUP_PARENT; > - newnd.flags |= LOOKUP_RENAME_TARGET; > + if (overwrite) > + newnd.flags |= LOOKUP_RENAME_TARGET; > > trap = lock_rename(new_dir, old_dir); > > @@ -4108,7 +4161,7 @@ retry: > error = -ENOTDIR; > if (oldnd.last.name[oldnd.last.len]) > goto exit4; > - if (newnd.last.name[newnd.last.len]) > + if (overwrite && newnd.last.name[newnd.last.len]) > goto exit4; > } > /* source should not be ancestor of target */ > @@ -4119,8 +4172,19 @@ retry: > error = PTR_ERR(new_dentry); > if (IS_ERR(new_dentry)) > goto exit4; > + if (!overwrite) { > + error = -ENOENT; > + if (!new_dentry->d_inode) > + goto exit4; > + > + if (!S_ISDIR(new_dentry->d_inode->i_mode)) { > + error = -ENOTDIR; > + if (newnd.last.name[newnd.last.len]) > + goto exit4; > + } > + } > /* target should not be an ancestor of source */ > - error = -ENOTEMPTY; > + error = overwrite ? -ENOTEMPTY : -EINVAL; > if (new_dentry == trap) > goto exit5; > > @@ -4128,8 +4192,15 @@ retry: > &newnd.path, new_dentry); > if (error) > goto exit5; > - error = vfs_rename(old_dir->d_inode, old_dentry, > - new_dir->d_inode, new_dentry); > + if (!overwrite) { > + error = security_path_rename(&newnd.path, new_dentry, > + &oldnd.path, old_dentry); > + if (error) > + goto exit5; > + } > + > + error = vfs_rename2(old_dir->d_inode, old_dentry, > + new_dir->d_inode, new_dentry, flags); > exit5: > dput(new_dentry); > exit4: > @@ -4154,9 +4225,15 @@ exit: > return error; > } > > +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, > + int, newdfd, const char __user *, newname) > +{ > + return sys_renameat2(olddfd, oldname, newdfd, newname, 0); > +} > + > SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) > { > - return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); > + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); > } > > int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) > diff --git a/include/linux/dcache.h b/include/linux/dcache.h > index 59066e0..ce5ebed 100644 > --- a/include/linux/dcache.h > +++ b/include/linux/dcache.h > @@ -297,6 +297,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *); > > /* used for rename() and baskets */ > extern void d_move(struct dentry *, struct dentry *); > +extern void d_exchange(struct dentry *, struct dentry *); > extern struct dentry *d_ancestor(struct dentry *, struct dentry *); > > /* appendix may either be NULL or be used for transname suffixes */ > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 3f40547..71c6cf9 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1572,6 +1572,8 @@ struct inode_operations { > int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); > int (*rename) (struct inode *, struct dentry *, > struct inode *, struct dentry *); > + int (*rename2) (struct inode *, struct dentry *, > + struct inode *, struct dentry *, unsigned int); > int (*setattr) (struct dentry *, struct iattr *); > int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); > int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 6c28b61..ebdafb6 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -35,6 +35,8 @@ > #define SEEK_HOLE 4 /* seek to the next hole */ > #define SEEK_MAX SEEK_HOLE > > +#define RENAME_EXCHANGE (1 << 0) /* Exchange source and dest */ > + > struct fstrim_range { > __u64 start; > __u64 len; > -- > 1.8.1.4 > -- Jan Kara <jack@xxxxxxx> SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html