lookup_and_lock_rename() combines locking and lookup for two names. It uses the new lock_two_directories_shared() if that is appropriate for the filesystem. unlock_rename_shared() does either a shared unlock or an exclusive unlock depending on how the filesystem wants rename to be handled. lookup_and_lock_rename_one() and done_lookup_and_lock_rename() are exported for other modules to use. As a rename can continue asynchronously after the inode lock is dropped, lock_two_directories() and lock_two_directories_shared() must ensure that is not happening before looking at ->d_parent. This requires a call to d_update_wait(). Note that is the dentry is locked for update it must be a rename. It cannot be a create or a (successful) rmdir as these dentries are not empty - except possibly the target directory, but waiting for the rmdir there is still needed of course. Signed-off-by: NeilBrown <neilb@xxxxxxx> --- fs/namei.c | 230 +++++++++++++++++++++++++++++++++++------- include/linux/namei.h | 7 ++ 2 files changed, 199 insertions(+), 38 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index c7b7445c770e..771e9d7b620c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3451,8 +3451,14 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) { struct dentry *p = p1, *q = p2, *r; - while ((r = p->d_parent) != p2 && r != p) + /* Ensure d_update_wait() tests are safe - one barrier for all */ + smp_mb(); + + d_update_wait(p, I_MUTEX_NORMAL); + while ((r = p->d_parent) != p2 && r != p) { p = r; + d_update_wait(p, I_MUTEX_NORMAL); + } if (r == p2) { // p is a child of p2 and an ancestor of p1 or p1 itself inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); @@ -3461,8 +3467,11 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) } // p is the root of connected component that contains p1 // p2 does not occur on the path from p to p1 - while ((r = q->d_parent) != p1 && r != p && r != q) + d_update_wait(q, I_MUTEX_NORMAL); + while ((r = q->d_parent) != p1 && r != p && r != q) { q = r; + d_update_wait(q, I_MUTEX_NORMAL); + } if (r == p1) { // q is a child of p1 and an ancestor of p2 or p2 itself inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); @@ -3479,6 +3488,46 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) } } +static struct dentry *lock_two_directories_shared(struct dentry *p1, struct dentry *p2) +{ + struct dentry *p = p1, *q = p2, *r; + + /* Ensure d_update_wait() tests are safe - one barrier for all */ + smp_mb(); + + d_update_wait(p1, I_MUTEX_NORMAL); + while ((r = p->d_parent) != p2 && r != p) { + p = r; + d_update_wait(p, I_MUTEX_NORMAL); + } + if (r == p2) { + // p is a child of p2 and an ancestor of p1 or p1 itself + inode_lock_shared_nested(p2->d_inode, I_MUTEX_PARENT); + inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT2); + return p; + } + // p is the root of connected component that contains p1 + // p2 does not occur on the path from p to p1 + d_update_wait(q, I_MUTEX_NORMAL); + while ((r = q->d_parent) != p1 && r != p && r != q) { + q = r; + d_update_wait(q, I_MUTEX_NORMAL); + } + if (r == p1) { + // q is a child of p1 and an ancestor of p2 or p2 itself + inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_shared_nested(p2->d_inode, I_MUTEX_PARENT2); + return q; + } else if (likely(r == p)) { + // both p2 and p1 are descendents of p + inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_shared_nested(p2->d_inode, I_MUTEX_PARENT2); + return NULL; + } else { // no common ancestor at the time we'd been called + return ERR_PTR(-EXDEV); + } +} + /* * p1 and p2 should be directories on the same fs. */ @@ -3494,6 +3543,134 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) } EXPORT_SYMBOL(lock_rename); +static void unlock_rename_shared(struct dentry *p1, struct dentry *p2) +{ + if (!(p1->d_inode->i_flags & S_ASYNC_RENAME)) + unlock_rename(p1, p2); + else { + inode_unlock_shared(p1->d_inode); + if (p1 != p2) { + inode_unlock_shared(p2->d_inode); + mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); + } + } +} + +static int +lookup_and_lock_rename(struct dentry *p1, struct dentry *p2, + struct dentry **d1p, struct dentry **d2p, + struct qstr *last1, struct qstr *last2, + unsigned int flags1, unsigned int flags2) +{ + struct dentry *p = NULL; + struct dentry *d1, *d2; + bool ok1, ok2; + + if (p1->d_inode->i_flags & S_ASYNC_RENAME) { + if (p1 == p2) { + /* same parent - only one parent lock needed and + * no s_vfs_rename_mutex */ + inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT); + } else { + mutex_lock(&p1->d_sb->s_vfs_rename_mutex); + + p = lock_two_directories_shared(p1, p2); + if (IS_ERR(p)) { + mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); + return PTR_ERR(p); + } + } + } else + lock_rename(p1, p2); +retry: + d1 = lookup_one_qstr(last1, p1, flags1); + if (IS_ERR(d1)) + goto out_unlock_1; + d2 = lookup_one_qstr(last2, p2, flags2); + if (IS_ERR(d2)) + goto out_unlock_2; + + if (d1 == p) { + dput(d1); dput(d2); + unlock_rename_shared(p1, p2); + if (flags1 & LOOKUP_CREATE) + return -EINVAL; + else + return -ENOTEMPTY; + } + + if (d2 == p) { + dput(d1); dput(d2); + unlock_rename_shared(p1, p2); + if (flags2 & LOOKUP_CREATE) + return -EINVAL; + else + return -ENOTEMPTY; + } + + if (d1 < d2) { + ok1 = d_update_lock(d1, p1, last1, I_MUTEX_PARENT); + ok2 = d_update_lock(d2, p2, last2, I_MUTEX_PARENT2); + } else if (d1 > d2) { + ok2 = d_update_lock(d2, p2, last2, I_MUTEX_PARENT); + ok1 = d_update_lock(d1, p1, last1, I_MUTEX_PARENT2); + } else { + ok1 = ok2 = d_update_lock(d1, p1, last1, I_MUTEX_PARENT); + } + if (!ok1 || !ok2) { + if (ok1) + d_update_unlock(d1); + if (ok2 && d2 != d1) + d_update_unlock(d2); + dput(d1); + dput(d2); + goto retry; + } + *d1p = d1; *d2p = d2; + return 0; + +out_unlock_2: + dput(d1); + d1 = d2; +out_unlock_1: + unlock_rename_shared(p1, p2); + return PTR_ERR(d1); +} + +int lookup_and_lock_rename_one(struct dentry *p1, struct dentry *p2, + struct dentry **d1p, struct dentry **d2p, + const char *name1, int nlen1, + const char *name2, int nlen2, + unsigned int flags1, unsigned int flags2) +{ + struct qstr this1, this2; + int err; + + err = lookup_one_common(&nop_mnt_idmap, name1, p1, nlen1, &this1); + if (err) + return err; + err = lookup_one_common(&nop_mnt_idmap, name2, p2, nlen2, &this2); + if (err) + return err; + return lookup_and_lock_rename(p1, p2, d1p, d2p, &this1, &this2, + flags1, flags2); +} +EXPORT_SYMBOL(lookup_and_lock_rename_one); + +void done_lookup_and_lock_rename(struct dentry *p1, struct dentry *p2, + struct dentry *d1, struct dentry *d2) +{ + d_lookup_done(d1); + d_lookup_done(d2); + d_update_unlock(d1); + if (d2 != d1) + d_update_unlock(d2); + unlock_rename_shared(p1, p2); + dput(d1); + dput(d2); +} +EXPORT_SYMBOL(done_lookup_and_lock_rename); + /* * c1 and p2 should be on the same fs. */ @@ -5497,7 +5674,6 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, { struct renamedata rd; struct dentry *old_dentry, *new_dentry; - struct dentry *trap; struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; @@ -5548,51 +5724,33 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, goto exit2; retry_deleg: - trap = lock_rename(new_path.dentry, old_path.dentry); - if (IS_ERR(trap)) { - error = PTR_ERR(trap); + error = lookup_and_lock_rename(old_path.dentry, new_path.dentry, + &old_dentry, &new_dentry, + &old_last, &new_last, + lookup_flags, lookup_flags | target_flags); + if (error) goto exit_lock_rename; - } - old_dentry = lookup_one_qstr(&old_last, old_path.dentry, - lookup_flags); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; - new_dentry = lookup_one_qstr(&new_last, new_path.dentry, - lookup_flags | target_flags); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; if (flags & RENAME_EXCHANGE) { if (!d_is_dir(new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) - goto exit5; + goto exit_unlock; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ if (!d_is_dir(old_dentry)) { error = -ENOTDIR; if (old_last.name[old_last.len]) - goto exit5; + goto exit_unlock; if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) - goto exit5; - } - /* source should not be ancestor of target */ - error = -EINVAL; - if (old_dentry == trap) - goto exit5; - /* target should not be an ancestor of source */ - if (!(flags & RENAME_EXCHANGE)) - error = -ENOTEMPTY; - if (new_dentry == trap) - goto exit5; + goto exit_unlock; + } error = security_path_rename(&old_path, old_dentry, &new_path, new_dentry, flags); if (error) - goto exit5; + goto exit_unlock; rd.old_dir = old_path.dentry->d_inode; rd.old_dentry = old_dentry; @@ -5603,13 +5761,9 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, rd.delegated_inode = &delegated_inode; rd.flags = flags; error = vfs_rename(&rd); -exit5: - d_lookup_done(new_dentry); - dput(new_dentry); -exit4: - dput(old_dentry); -exit3: - unlock_rename(new_path.dentry, old_path.dentry); +exit_unlock: + done_lookup_and_lock_rename(new_path.dentry, old_path.dentry, + new_dentry, old_dentry); exit_lock_rename: if (delegated_inode) { error = break_deleg_wait(&delegated_inode); diff --git a/include/linux/namei.h b/include/linux/namei.h index 72e351640406..8ef7aa6ed64c 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -104,6 +104,13 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +int lookup_and_lock_rename_one(struct dentry *p1, struct dentry *p2, + struct dentry **d1p, struct dentry **d2p, + const char *name1, int nlen1, + const char *name2, int nlen2, + unsigned int flags1, unsigned int flags2); +void done_lookup_and_lock_rename(struct dentry *p1, struct dentry *p2, + struct dentry *d1, struct dentry *d2); /** * mode_strip_umask - handle vfs umask stripping -- 2.47.1