Re: [PATCH 14/19] VFS: Ensure no async updates happening in directory being removed.

Christian Brauner <brauner@xxxxxxxxxx> · Thu, 6 Feb 2025 15:06:35 +0100

On Thu, Feb 06, 2025 at 04:42:51PM +1100, NeilBrown wrote:
> vfs_rmdir takes an exclusive lock on the target directory to ensure
> nothing new is created in it while the rmdir progresses.  With the

It also excludes concurrent mount operations.

> possibility of async updates continuing after the inode lock is dropped
> we now need extra protection.
> 
> Any async updates will have DCACHE_PAR_UPDATE set on the dentry.  We
> simply wait for that flag to be cleared on all children.
> 
> Signed-off-by: NeilBrown <neilb@xxxxxxx>
> ---
>  fs/dcache.c |  2 +-
>  fs/namei.c  | 40 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/dcache.c b/fs/dcache.c
> index fb331596f1b1..90dee859d138 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -53,7 +53,7 @@
>   *   - d_lru
>   *   - d_count
>   *   - d_unhashed()
> - *   - d_parent and d_chilren
> + *   - d_parent and d_children
>   *   - childrens' d_sib and d_parent
>   *   - d_u.d_alias, d_inode
>   *
> diff --git a/fs/namei.c b/fs/namei.c
> index 3a107d6098be..e8a85c9f431c 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1839,6 +1839,27 @@ bool d_update_lock(struct dentry *dentry,
>  	return true;
>  }
>  
> +static void d_update_wait(struct dentry *dentry, unsigned int subclass)
> +{
> +	/* Note this may only ever be called in a context where we have
> +	 * a lock preventing this dentry from becoming locked, possibly
> +	 * an update lock on the parent dentry.  The must be a smp_mb()
> +	 * after that lock is taken and before this is called so that
> +	 * the following test is safe. d_update_lock() provides that
> +	 * barrier.
> +	 */
> +	if (!(dentry->d_flags & DCACHE_PAR_UPDATE))
> +		return
> +	lock_acquire_exclusive(&dentry->d_update_map, subclass,
> +			       0, NULL, _THIS_IP_);
> +	spin_lock(&dentry->d_lock);
> +	wait_var_event_spinlock(&dentry->d_flags,
> +				!check_dentry_locked(dentry),
> +				&dentry->d_lock);
> +	spin_unlock(&dentry->d_lock);
> +	lock_map_release(&dentry->d_update_map);
> +}
> +
>  bool d_update_trylock(struct dentry *dentry,
>  		      struct dentry *base,
>  		      const struct qstr *last)
> @@ -4688,6 +4709,7 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
>  		     struct dentry *dentry)
>  {
>  	int error = may_delete(idmap, dir, dentry, 1);
> +	struct dentry *child;
>  
>  	if (error)
>  		return error;
> @@ -4697,6 +4719,24 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
>  
>  	dget(dentry);
>  	inode_lock(dentry->d_inode);
> +	/*
> +	 * Some children of dentry might be active in an async update.
> +	 * We need to wait for them.  New children cannot be locked
> +	 * while the inode lock is held.
> +	 */
> +again:
> +	spin_lock(&dentry->d_lock);
> +	for (child = d_first_child(dentry); child;
> +	     child = d_next_sibling(child)) {
> +		if (child->d_flags & DCACHE_PAR_UPDATE) {
> +			dget(child);
> +			spin_unlock(&dentry->d_lock);
> +			d_update_wait(child, I_MUTEX_CHILD);
> +			dput(child);
> +			goto again;
> +		}
> +	}
> +	spin_unlock(&dentry->d_lock);

That looks like it can cause stalls when you call rmdir on a directory
that has a lots of children and a larg-ish subset of them has pending
async updates, no?