From: Miklos Szeredi <mszeredi@xxxxxxx> Add new renameat2 syscall, which is the same as renameat with an added flags argument. If flags is zero then this is a plain overwriting rename. If flags contain RENAME_EXCHANGE then exchange source and destination files. There's no restriction on the type of the files; e.g. a directory can be exchanged with a symlink. Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx> --- arch/x86/syscalls/syscall_64.tbl | 1 + fs/dcache.c | 46 ++++++++++--- fs/namei.c | 139 ++++++++++++++++++++++++++++++--------- include/linux/dcache.h | 1 + include/linux/fs.h | 2 + include/uapi/linux/fs.h | 2 + 6 files changed, 152 insertions(+), 39 deletions(-) diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65d..fafd734 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,6 +320,7 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module +314 common renameat2 sys_renameat2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/fs/dcache.c b/fs/dcache.c index 4100030..1735bac 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2495,12 +2495,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) dentry->d_name.name = dentry->d_iname; } else { /* - * Both are internal. Just copy target to dentry + * Both are internal. */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.len = target->d_name.len; - return; + unsigned int i; + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { + swap(((long *) &dentry->d_iname)[i], + ((long *) &target->d_iname)[i]); + } } } swap(dentry->d_name.len, target->d_name.len); @@ -2557,13 +2559,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * __d_move - move a dentry * @dentry: entry to move * @target: new dentry + * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. Caller must hold * rename_lock, the i_mutex of the source and target directories, * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ -static void __d_move(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2587,6 +2591,10 @@ static void __d_move(struct dentry * dentry, struct dentry * target) /* Unhash the target: dput() will then get rid of it */ __d_drop(target); + if (exchange) { + __d_rehash(target, + d_hash(dentry->d_parent, dentry->d_name.hash)); + } list_del(&dentry->d_u.d_child); list_del(&target->d_u.d_child); @@ -2613,6 +2621,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) write_seqcount_end(&dentry->d_seq); dentry_unlock_parents_for_move(dentry, target); + if (exchange) + fsnotify_d_move(target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2630,11 +2640,31 @@ static void __d_move(struct dentry * dentry, struct dentry * target) void d_move(struct dentry *dentry, struct dentry *target) { write_seqlock(&rename_lock); - __d_move(dentry, target); + __d_move(dentry, target, false); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); +/* + * d_exchange - exchange two dentries + * @dentry1: first dentry + * @dentry2: second dentry + */ +void d_exchange(struct dentry *dentry1, struct dentry *dentry2) +{ + write_seqlock(&rename_lock); + + WARN_ON(!dentry1->d_inode); + WARN_ON(!dentry2->d_inode); + WARN_ON(IS_ROOT(dentry1)); + WARN_ON(IS_ROOT(dentry2)); + + __d_move(dentry1, dentry2, true); + + write_sequnlock(&rename_lock); +} + + /** * d_ancestor - search for an ancestor * @p1: ancestor dentry @@ -2682,7 +2712,7 @@ static struct dentry *__d_unalias(struct inode *inode, m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: if (likely(!d_mountpoint(alias))) { - __d_move(alias, dentry); + __d_move(alias, dentry, false); ret = alias; } out_err: diff --git a/fs/namei.c b/fs/namei.c index 7ec6a12..55700b3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3963,14 +3963,18 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int vfs_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { int error; const unsigned char *old_name; struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; bool is_dir = S_ISDIR(source->i_mode); + bool new_is_dir = target ? S_ISDIR(target->i_mode) : false; + bool overwrite = !(flags & RENAME_EXCHANGE); + unsigned max_links = new_dir->i_sb->s_max_links; if (source == target) return 0; @@ -3981,74 +3985,116 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!target) error = may_create(new_dir, new_dentry); - else + else if (overwrite) error = may_delete(new_dir, new_dentry, is_dir); + else + error = may_delete(new_dir, new_dentry, new_is_dir); if (error) return error; - if (!old_dir->i_op->rename) + if (!old_dir->i_op->rename && !old_dir->i_op->rename2) return -EPERM; + if (flags && !old_dir->i_op->rename2) + return -EOPNOTSUPP; + /* * If we are going to change the parent - check write permissions, * we'll need to flip '..'. */ - if (is_dir && new_dir != old_dir) { - error = inode_permission(source, MAY_WRITE); - if (error) - return error; + if (new_dir != old_dir) { + if (is_dir) { + error = inode_permission(source, MAY_WRITE); + if (error) + return error; + } + if (!overwrite && new_is_dir) { + error = inode_permission(target, MAY_WRITE); + if (error) + return error; + } } error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); if (error) return error; + if (!overwrite) { + error = security_inode_rename(new_dir, new_dentry, + old_dir, old_dentry); + if (error) + return error; + } + old_name = fsnotify_oldname_init(old_dentry->d_name.name); dget(new_dentry); - if (target) + if (overwrite && target) mutex_lock(&target->i_mutex); error = -EBUSY; if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; - if (is_dir) { - unsigned max_links = new_dir->i_sb->s_max_links; - + if (max_links && new_dir != old_dir) { error = -EMLINK; - if (max_links && !target && new_dir != old_dir && - new_dir->i_nlink >= max_links) + if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) goto out; + if (!overwrite && !is_dir && new_is_dir && + old_dir->i_nlink > max_links) + goto out; + } + + if (overwrite && is_dir && target) + shrink_dcache_parent(new_dentry); - if (target) - shrink_dcache_parent(new_dentry); + if (old_dir->i_op->rename2) { + error = old_dir->i_op->rename2(old_dir, old_dentry, + new_dir, new_dentry, flags); + } else { + error = old_dir->i_op->rename(old_dir, old_dentry, + new_dir, new_dentry); } - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; - if (target) { + if (overwrite && target) { if (is_dir) target->i_flags |= S_DEAD; dont_mount(new_dentry); } - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { + if (overwrite) + d_move(old_dentry, new_dentry); + else + d_exchange(old_dentry, new_dentry); + } out: - if (target) + if (overwrite && target) mutex_unlock(&target->i_mutex); dput(new_dentry); - if (!error) + if (!error) { fsnotify_move(old_dir, new_dir, old_name, is_dir, - target, old_dentry); + overwrite ? target : NULL, old_dentry); + if (!overwrite) { + fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, + new_is_dir, NULL, new_dentry); + } + } fsnotify_oldname_free(old_name); return error; } -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, - int, newdfd, const char __user *, newname) +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + return vfs_rename2(old_dir, old_dentry, new_dir, new_dentry, 0); +} + + +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname, unsigned int, flags) { struct dentry *old_dir, *new_dir; struct dentry *old_dentry, *new_dentry; @@ -4058,7 +4104,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct filename *to; unsigned int lookup_flags = 0; bool should_retry = false; + bool overwrite = !(flags & RENAME_EXCHANGE); int error; + + error = -EOPNOTSUPP; + if (flags & ~RENAME_EXCHANGE) + goto exit; + retry: from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); if (IS_ERR(from)) { @@ -4091,7 +4143,8 @@ retry: oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; - newnd.flags |= LOOKUP_RENAME_TARGET; + if (overwrite) + newnd.flags |= LOOKUP_RENAME_TARGET; trap = lock_rename(new_dir, old_dir); @@ -4108,7 +4161,7 @@ retry: error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) goto exit4; - if (newnd.last.name[newnd.last.len]) + if (overwrite && newnd.last.name[newnd.last.len]) goto exit4; } /* source should not be ancestor of target */ @@ -4119,8 +4172,19 @@ retry: error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; + if (!overwrite) { + error = -ENOENT; + if (!new_dentry->d_inode) + goto exit4; + + if (!S_ISDIR(new_dentry->d_inode->i_mode)) { + error = -ENOTDIR; + if (newnd.last.name[newnd.last.len]) + goto exit4; + } + } /* target should not be an ancestor of source */ - error = -ENOTEMPTY; + error = overwrite ? -ENOTEMPTY : -EINVAL; if (new_dentry == trap) goto exit5; @@ -4128,8 +4192,15 @@ retry: &newnd.path, new_dentry); if (error) goto exit5; - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); + if (!overwrite) { + error = security_path_rename(&newnd.path, new_dentry, + &oldnd.path, old_dentry); + if (error) + goto exit5; + } + + error = vfs_rename2(old_dir->d_inode, old_dentry, + new_dir->d_inode, new_dentry, flags); exit5: dput(new_dentry); exit4: @@ -4154,9 +4225,15 @@ exit: return error; } +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname) +{ + return sys_renameat2(olddfd, oldname, newdfd, newname, 0); +} + SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { - return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59066e0..ce5ebed 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -297,6 +297,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *); /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); +extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f40547..71c6cf9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1572,6 +1572,8 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 6c28b61..ebdafb6 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -35,6 +35,8 @@ #define SEEK_HOLE 4 /* seek to the next hole */ #define SEEK_MAX SEEK_HOLE +#define RENAME_EXCHANGE (1 << 0) /* Exchange source and dest */ + struct fstrim_range { __u64 start; __u64 len; -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html