Re: [PATCH 09/10] xfs: poll waiting for quotacheck

Brian Foster <bfoster@xxxxxxxxxx> · Tue, 11 Jun 2019 11:07:12 -0400

On Tue, Jun 04, 2019 at 02:50:27PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> Create a pwork destroy function that uses polling instead of
> uninterruptible sleep to wait for work items to finish so that we can
> touch the softlockup watchdog.  IOWs, gross hack.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---

Seems reasonable given the quirky circumstances of quotacheck. Just a
couple nits..

>  fs/xfs/xfs_iwalk.c |    3 +++
>  fs/xfs/xfs_iwalk.h |    3 ++-
>  fs/xfs/xfs_pwork.c |   21 +++++++++++++++++++++
>  fs/xfs/xfs_pwork.h |    2 ++
>  fs/xfs/xfs_qm.c    |    2 +-
>  5 files changed, 29 insertions(+), 2 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
> index 71ee1628aa70..c4a9c4c246b7 100644
> --- a/fs/xfs/xfs_iwalk.c
> +++ b/fs/xfs/xfs_iwalk.c
> @@ -526,6 +526,7 @@ xfs_iwalk_threaded(
>  	xfs_ino_t		startino,
>  	xfs_iwalk_fn		iwalk_fn,
>  	unsigned int		max_prefetch,
> +	bool			polled,
>  	void			*data)
>  {
>  	struct xfs_pwork_ctl	pctl;
> @@ -556,5 +557,7 @@ xfs_iwalk_threaded(
>  		startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
>  	}
>  
> +	if (polled)
> +		return xfs_pwork_destroy_poll(&pctl);
>  	return xfs_pwork_destroy(&pctl);

Rather than have duplicate destruction paths, could we rework
xfs_pwork_destroy_poll() to something like xfs_pwork_poll() that just
does the polling and returns, then the caller can fall back into the
current xfs_pwork_destroy()? I.e., this ends up looking like:

	...
	/* poll to keep soft lockup watchdog quiet */
	if (polled)
		xfs_pwork_poll(&pctl);
	return xfs_pwork_destroy(&pctl);

>  }
...
> diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
> index 19605a3a2482..3b885e0b52ac 100644
> --- a/fs/xfs/xfs_pwork.c
> +++ b/fs/xfs/xfs_pwork.c
...
> @@ -97,6 +101,23 @@ xfs_pwork_destroy(
>  	return pctl->error;
>  }
>  
> +/*
> + * Wait for the work to finish and tear down the control structure.
> + * Continually poll completion status and touch the soft lockup watchdog.
> + * This is for things like mount that hold locks.
> + */
> +int
> +xfs_pwork_destroy_poll(
> +	struct xfs_pwork_ctl	*pctl)
> +{
> +	while (atomic_read(&pctl->nr_work) > 0) {
> +		msleep(1);
> +		touch_softlockup_watchdog();
> +	}

Any idea what the typical watchdog timeout is? I'm curious where the 1ms
comes from and whether we could get away with anything larger. I realize
that might introduce mount latency with the current sleep based
implementation, but we could also consider a waitqueue and using
something like wait_event_timeout() to schedule out for longer time
periods and still wake up immediately when the count drops to 0.

Brian

> +
> +	return xfs_pwork_destroy(pctl);
> +}
> +
>  /*
>   * Return the amount of parallelism that the data device can handle, or 0 for
>   * no limit.
> diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
> index e0c1354a2d8c..08da723a8dc9 100644
> --- a/fs/xfs/xfs_pwork.h
> +++ b/fs/xfs/xfs_pwork.h
> @@ -18,6 +18,7 @@ struct xfs_pwork_ctl {
>  	struct workqueue_struct	*wq;
>  	struct xfs_mount	*mp;
>  	xfs_pwork_work_fn	work_fn;
> +	atomic_t		nr_work;
>  	int			error;
>  };
>  
> @@ -45,6 +46,7 @@ int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
>  		unsigned int nr_threads);
>  void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
>  int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
> +int xfs_pwork_destroy_poll(struct xfs_pwork_ctl *pctl);
>  unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
>  
>  #endif /* __XFS_PWORK_H__ */
> diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
> index e4f3785f7a64..de6a623ada02 100644
> --- a/fs/xfs/xfs_qm.c
> +++ b/fs/xfs/xfs_qm.c
> @@ -1305,7 +1305,7 @@ xfs_qm_quotacheck(
>  		flags |= XFS_PQUOTA_CHKD;
>  	}
>  
> -	error = xfs_iwalk_threaded(mp, 0, xfs_qm_dqusage_adjust, 0, NULL);
> +	error = xfs_iwalk_threaded(mp, 0, xfs_qm_dqusage_adjust, 0, true, NULL);
>  	if (error)
>  		goto error_return;
>  
>