Re: [PATCH 41/42] xfs: return a referenced perag from filestreams allocator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 19, 2023 at 09:45:04AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Now that the filestreams AG selection tracks active perags, we need
> to return an active perag to the core allocator code. This is
> because the file allocation the filestreams code will run are AG
> specific allocations and so need to pin the AG until the allocations
> complete.
> 
> We cannot rely on the filestreams item reference to do this - the
> filestreams association can be torn down at any time, hence we
> need to have a separate reference for the allocation process to pin
> the AG after it has been selected.
> 
> This means there is some perag juggling in allocation failure
> fallback paths as they will do all AG scans in the case the AG
> specific allocation fails. Hence we need to track the perag
> reference that the filestream allocator returned to make sure we
> don't leak it on repeated allocation failure.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 38 +++++++++++-----
>  fs/xfs/xfs_filestream.c  | 93 ++++++++++++++++++++++++----------------
>  2 files changed, 84 insertions(+), 47 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 098b46f3f3e3..7f56002b545d 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -3427,6 +3427,7 @@ xfs_bmap_btalloc_at_eof(
>  	bool			ag_only)
>  {
>  	struct xfs_mount	*mp = args->mp;
> +	struct xfs_perag	*caller_pag = args->pag;
>  	int			error;
>  
>  	/*
> @@ -3454,9 +3455,11 @@ xfs_bmap_btalloc_at_eof(
>  		else
>  			args->minalignslop = 0;
>  
> -		args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
> +		if (!caller_pag)
> +			args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
>  		error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
> -		xfs_perag_put(args->pag);
> +		if (!caller_pag)
> +			xfs_perag_put(args->pag);
>  		if (error)
>  			return error;
>  
> @@ -3482,10 +3485,13 @@ xfs_bmap_btalloc_at_eof(
>  		args->minalignslop = 0;
>  	}
>  
> -	if (ag_only)
> +	if (ag_only) {
>  		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
> -	else
> +	} else {
> +		args->pag = NULL;
>  		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
> +		args->pag = caller_pag;

At first glance I wondered if we end up leaking any args->pag set by the
_iterate_ags function, but I think it's the case that _finish will
release args->pag and set it back to NULL?  So in effect we're
preserving the caller's args->pag here, and nothing leaks.  In that
case, I think we should check that assumption:

		ASSERT(args->pag == NULL);
		args->pag = caller_pag;

If the answer to the above is yes, then with the above fixed,
Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>

--D

> +	}
>  	if (error)
>  		return error;
>  
> @@ -3544,12 +3550,13 @@ xfs_bmap_btalloc_filestreams(
>  	int			stripe_align)
>  {
>  	xfs_extlen_t		blen = 0;
> -	int			error;
> +	int			error = 0;
>  
>  
>  	error = xfs_filestream_select_ag(ap, args, &blen);
>  	if (error)
>  		return error;
> +	ASSERT(args->pag);
>  
>  	/*
>  	 * If we are in low space mode, then optimal allocation will fail so
> @@ -3558,22 +3565,31 @@ xfs_bmap_btalloc_filestreams(
>  	 */
>  	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
>  		args->minlen = ap->minlen;
> +		ASSERT(args->fsbno == NULLFSBLOCK);
>  		goto out_low_space;
>  	}
>  
>  	args->minlen = xfs_bmap_select_minlen(ap, args, blen);
> -	if (ap->aeof) {
> +	if (ap->aeof)
>  		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
>  				true);
> -		if (error || args->fsbno != NULLFSBLOCK)
> -			return error;
> -	}
>  
> -	error = xfs_alloc_vextent_near_bno(args, ap->blkno);
> +	if (!error && args->fsbno == NULLFSBLOCK)
> +		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
> +
> +out_low_space:
> +	/*
> +	 * We are now done with the perag reference for the filestreams
> +	 * association provided by xfs_filestream_select_ag(). Release it now as
> +	 * we've either succeeded, had a fatal error or we are out of space and
> +	 * need to do a full filesystem scan for free space which will take it's
> +	 * own references.
> +	 */
> +	xfs_perag_rele(args->pag);
> +	args->pag = NULL;
>  	if (error || args->fsbno != NULLFSBLOCK)
>  		return error;
>  
> -out_low_space:
>  	return xfs_bmap_btalloc_low_space(ap, args);
>  }
>  
> diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
> index 81aebe3e09ba..523a3b8b5754 100644
> --- a/fs/xfs/xfs_filestream.c
> +++ b/fs/xfs/xfs_filestream.c
> @@ -53,8 +53,9 @@ xfs_fstrm_free_func(
>   */
>  static int
>  xfs_filestream_pick_ag(
> +	struct xfs_alloc_arg	*args,
>  	struct xfs_inode	*ip,
> -	xfs_agnumber_t		*agp,
> +	xfs_agnumber_t		start_agno,
>  	int			flags,
>  	xfs_extlen_t		*longest)
>  {
> @@ -64,7 +65,6 @@ xfs_filestream_pick_ag(
>  	struct xfs_perag	*max_pag = NULL;
>  	xfs_extlen_t		minlen = *longest;
>  	xfs_extlen_t		free = 0, minfree, maxfree = 0;
> -	xfs_agnumber_t		start_agno = *agp;
>  	xfs_agnumber_t		agno;
>  	int			err, trylock;
>  
> @@ -73,8 +73,6 @@ xfs_filestream_pick_ag(
>  	/* 2% of an AG's blocks must be free for it to be chosen. */
>  	minfree = mp->m_sb.sb_agblocks / 50;
>  
> -	*agp = NULLAGNUMBER;
> -
>  	/* For the first pass, don't sleep trying to init the per-AG. */
>  	trylock = XFS_ALLOC_FLAG_TRYLOCK;
>  
> @@ -89,7 +87,7 @@ xfs_filestream_pick_ag(
>  				break;
>  			/* Couldn't lock the AGF, skip this AG. */
>  			err = 0;
> -			goto next_ag;
> +			continue;
>  		}
>  
>  		/* Keep track of the AG with the most free blocks. */
> @@ -146,16 +144,19 @@ xfs_filestream_pick_ag(
>  		/*
>  		 * No unassociated AGs are available, so select the AG with the
>  		 * most free space, regardless of whether it's already in use by
> -		 * another filestream. It none suit, return NULLAGNUMBER.
> +		 * another filestream. It none suit, just use whatever AG we can
> +		 * grab.
>  		 */
>  		if (!max_pag) {
> -			*agp = NULLAGNUMBER;
> -			trace_xfs_filestream_pick(ip, NULL, free);
> -			return 0;
> +			for_each_perag_wrap(mp, start_agno, agno, pag)
> +				break;
> +			atomic_inc(&pag->pagf_fstrms);
> +			*longest = 0;
> +		} else {
> +			pag = max_pag;
> +			free = maxfree;
> +			atomic_inc(&pag->pagf_fstrms);
>  		}
> -		pag = max_pag;
> -		free = maxfree;
> -		atomic_inc(&pag->pagf_fstrms);
>  	} else if (max_pag) {
>  		xfs_perag_rele(max_pag);
>  	}
> @@ -167,16 +168,29 @@ xfs_filestream_pick_ag(
>  	if (!item)
>  		goto out_put_ag;
>  
> +
> +	/*
> +	 * We are going to use this perag now, so take another ref to it for the
> +	 * allocation context returned to the caller. If we raced to create and
> +	 * insert the filestreams item into the MRU (-EEXIST), then we still
> +	 * keep this reference but free the item reference we gained above. On
> +	 * any other failure, we have to drop both.
> +	 */
> +	atomic_inc(&pag->pag_active_ref);
>  	item->pag = pag;
> +	args->pag = pag;
>  
>  	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
>  	if (err) {
> -		if (err == -EEXIST)
> +		if (err == -EEXIST) {
>  			err = 0;
> +		} else {
> +			xfs_perag_rele(args->pag);
> +			args->pag = NULL;
> +		}
>  		goto out_free_item;
>  	}
>  
> -	*agp = pag->pag_agno;
>  	return 0;
>  
>  out_free_item:
> @@ -236,7 +250,14 @@ xfs_filestream_select_ag_mru(
>  	if (!mru)
>  		goto out_default_agno;
>  
> +	/*
> +	 * Grab the pag and take an extra active reference for the caller whilst
> +	 * the mru item cannot go away. This means we'll pin the perag with
> +	 * the reference we get here even if the filestreams association is torn
> +	 * down immediately after we mark the lookup as done.
> +	 */
>  	pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
> +	atomic_inc(&pag->pag_active_ref);
>  	xfs_mru_cache_done(mp->m_filestream);
>  
>  	trace_xfs_filestream_lookup(pag, ap->ip->i_ino);
> @@ -246,6 +267,8 @@ xfs_filestream_select_ag_mru(
>  
>  	error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
>  	if (error) {
> +		/* We aren't going to use this perag */
> +		xfs_perag_rele(pag);
>  		if (error != -EAGAIN)
>  			return error;
>  		*blen = 0;
> @@ -253,12 +276,18 @@ xfs_filestream_select_ag_mru(
>  
>  	/*
>  	 * We are done if there's still enough contiguous free space to succeed.
> +	 * If there is very little free space before we start a filestreams
> +	 * allocation, we're almost guaranteed to fail to find a better AG with
> +	 * larger free space available so we don't even try.
>  	 */
>  	*agno = pag->pag_agno;
> -	if (*blen >= args->maxlen)
> +	if (*blen >= args->maxlen || (ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
> +		args->pag = pag;
>  		return 0;
> +	}
>  
>  	/* Changing parent AG association now, so remove the existing one. */
> +	xfs_perag_rele(pag);
>  	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
>  	if (mru) {
>  		struct xfs_fstrm_item *item =
> @@ -297,46 +326,38 @@ xfs_filestream_select_ag(
>  	struct xfs_inode	*pip = NULL;
>  	xfs_agnumber_t		agno;
>  	int			flags = 0;
> -	int			error;
> +	int			error = 0;
>  
>  	args->total = ap->total;
>  	*blen = 0;
>  
>  	pip = xfs_filestream_get_parent(ap->ip);
>  	if (!pip) {
> -		agno = 0;
> -		goto out_select;
> +		ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
> +		return 0;
>  	}
>  
>  	error = xfs_filestream_select_ag_mru(ap, args, pip, &agno, blen);
> -	if (error || *blen >= args->maxlen)
> +	if (error)
>  		goto out_rele;
> -
> -	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
> -	xfs_bmap_adjacent(ap);
> -
> -	/*
> -	 * If there is very little free space before we start a filestreams
> -	 * allocation, we're almost guaranteed to fail to find a better AG with
> -	 * larger free space available so we don't even try.
> -	 */
> +	if (*blen >= args->maxlen)
> +		goto out_select;
>  	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
>  		goto out_select;
>  
> +	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
> +	xfs_bmap_adjacent(ap);
> +	*blen = ap->length;
>  	if (ap->datatype & XFS_ALLOC_USERDATA)
>  		flags |= XFS_PICK_USERDATA;
>  	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
>  		flags |= XFS_PICK_LOWSPACE;
>  
> -	*blen = ap->length;
> -	error = xfs_filestream_pick_ag(pip, &agno, flags, blen);
> -	if (agno == NULLAGNUMBER) {
> -		agno = 0;
> -		*blen = 0;
> -	}
> -
> +	error = xfs_filestream_pick_ag(args, pip, agno, flags, blen);
> +	if (error)
> +		goto out_rele;
>  out_select:
> -	ap->blkno = XFS_AGB_TO_FSB(mp, agno, 0);
> +	ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0);
>  out_rele:
>  	xfs_irele(pip);
>  	return error;
> -- 
> 2.39.0
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux