There are two scenarios that can result in an alias being found in d_splice_alias. The first scenario is a disconnected dentry being looked up and reconnected (common with nfs file handles, and open_by_handle_at). The second scenario is a lookup on a directory lazily discovering that it was renamed on the remote filesystem. The locking challenge for handling these two scenarios is that the instance of lookup calling d_splice_alias has already taken the dentry->d_parent->d_inode->i_mutex. If the mutex was not held the locking we could just do: rename_mutex = &inode->i_sb->s_vfs_rename_mutex; mutex_lock(rename_mutex); if (d_ancestor(alias, dentry)) { mutex_unlock(rename_mutex); pr_warn_ratelimited(...); return -ELOOP; } m1 = &dentry->d_parent->d_inode->i_mutex; mutex_lock_nested(m1, I_MUTEX_PARENT); if (!IS_ROOT(alias) && (alias->d_parent != dentry->d_parent)) { m2 = &alias->d_parent->d_inode->i_mutex; mutex_lock_nested(m2, I_MUTEX_PARENT2); } d_move(alias, dentry); if (m2) mutex_unlock(m2); mutex_unlock(m1); mutex_unlock(rename_mutex); Which is essentially lock_rename and unlock_reaname with added handling of the IS_ROOT case. The problem is that as the lookup locking stands today grabbing the s_vfs_rename_mutex must use mutex_trylock which can fail, so for reliability reasons we need to avoid using the rename_mutex as much as possible. For the case where a disconnected dentry is being connected (aka the IS_ROOT case) the d_ancestor test can be placed under the rename_lock removing any chance that taking locks will cause a failure. For the lazy rename case things are trickier because when dentry->d_parent and alias->d_parent are not equal the code need to take the s_vfs_rename_mutex, or risk the d_ancestor call in lock_rename being inaccurate. As s_vfs_rename_mutex is take first there are no locking ordering issues with taking alias->d_parent->d_inode->i_mutex. Furthermore as games with rename_lock are unnecessary d_move instead of __d_move can be called. Sleeping in d_splice_alias is not something new as security_d_instantiate is a sleeping function. Compared to the current implementation this change introduces a function for each case __d_rename_alias and __d_connect_alias and moves the acquisition of rename_lock down into those functions. In the case of __d_rename_alias which was __d_unalias this allows the existing lock ordering rules to be followed much more closely removing the need for a trylock when acquiring alias->d_parent->d_inode->i_mutex, and allowing the use of d_move. A common helper that prints the warning message when a loop is detected is factored out into d_alias_is_ancestor so that code does not need to be duplicated in both cases. Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> --- fs/dcache.c | 111 ++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 53b7f1e63beb..c1eece74621f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2711,41 +2711,77 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) return NULL; } +static struct dentry *d_alias_is_ancestor(struct dentry *alias, + struct dentry *dentry) +{ + struct dentry *ancestor = d_ancestor(alias, dentry); + if (ancestor) { + pr_warn_ratelimited( + "VFS: Lookup of '%s' in %s %s would have caused loop\n", + dentry->d_name.name, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + } + return ancestor; +} + /* * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex, and rename_lock + * dentry->d_parent->d_inode->i_mutex * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static int __d_unalias(struct inode *inode, - struct dentry *dentry, struct dentry *alias) +static int __d_rename_alias(struct dentry *alias, struct dentry *dentry) { - struct mutex *m1 = NULL, *m2 = NULL; - int ret = -ESTALE; + struct mutex *rename_mutex = NULL, *parent_mutex = NULL; - /* If alias and dentry share a parent, then no extra locks required */ - if (alias->d_parent == dentry->d_parent) - goto out_unalias; + /* Holding dentry->d_parent->d_inode->i_mutex guarantees this + * that the equality or inequality alias->d_parent and + * dentry->d_parent remains stable. + */ + if (alias->d_parent != dentry->d_parent) { + /* See lock_rename() */ + rename_mutex = &dentry->d_sb->s_vfs_rename_mutex; + if (!mutex_trylock(rename_mutex)) + return -ESTALE; + + if (d_alias_is_ancestor(alias, dentry)) { + mutex_unlock(rename_mutex); + return -ELOOP; + } + parent_mutex = &alias->d_parent->d_inode->i_mutex; + mutex_lock_nested(parent_mutex, I_MUTEX_PARENT2); + } + d_move(alias, dentry); + if (parent_mutex) { + mutex_unlock(parent_mutex); + mutex_unlock(rename_mutex); + } + return 0; +} - /* See lock_rename() */ - if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) - goto out_err; - m1 = &dentry->d_sb->s_vfs_rename_mutex; - if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) - goto out_err; - m2 = &alias->d_parent->d_inode->i_mutex; -out_unalias: +/* + * This helper connects disconnected dentries. + * + * It assumes that the caller is already holding + * dentry->d_parent->d_inode->i_mutex + * + */ +static int __d_connect_alias(struct inode *inode, + struct dentry *alias, struct dentry *dentry) +{ + write_seqlock(&rename_lock); + if (unlikely(d_alias_is_ancestor(alias, dentry))) { + write_sequnlock(&rename_lock); + return -ELOOP; + } __d_move(alias, dentry, false); - ret = 0; -out_err: - if (m2) - mutex_unlock(m2); - if (m1) - mutex_unlock(m1); - return ret; + write_sequnlock(&rename_lock); + security_d_instantiate(alias, inode); + return 0; } /** @@ -2786,30 +2822,17 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (S_ISDIR(inode->i_mode)) { struct dentry *new = __d_find_any_alias(inode); if (unlikely(new)) { + int err; /* The reference to new ensures it remains an alias */ spin_unlock(&inode->i_lock); - write_seqlock(&rename_lock); - if (unlikely(d_ancestor(new, dentry))) { - write_sequnlock(&rename_lock); + + if (!IS_ROOT(new)) + err = __d_rename_alias(new, dentry); + else + err = __d_connect_alias(inode, new, dentry); + if (err) { dput(new); - new = ERR_PTR(-ELOOP); - pr_warn_ratelimited( - "VFS: Lookup of '%s' in %s %s" - " would have caused loop\n", - dentry->d_name.name, - inode->i_sb->s_type->name, - inode->i_sb->s_id); - } else if (!IS_ROOT(new)) { - int err = __d_unalias(inode, dentry, new); - write_sequnlock(&rename_lock); - if (err) { - dput(new); - new = ERR_PTR(err); - } - } else { - __d_move(new, dentry, false); - write_sequnlock(&rename_lock); - security_d_instantiate(new, inode); + new = ERR_PTR(err); } iput(inode); return new; -- 2.2.1 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers