Re: [PATCH 42/42] xfs: refactor the filestreams allocator pick functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 19, 2023 at 09:45:05AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Now that the filestreams allocator is largely rewritten,
> restructure the main entry point and pick function to seperate out
> the different operations cleanly. The MRU lookup function should not
> handle the start AG selection on MRU lookup failure, and nor should
> the pick function handle building the association that is inserted
> into the MRU.
> 
> This leaves the filestreams allocator fairly clean and easy to
> understand, returning to the caller with an active perag reference
> and a target block to allocate at.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_filestream.c | 247 +++++++++++++++++++++-------------------
>  fs/xfs/xfs_trace.h      |   9 +-
>  2 files changed, 132 insertions(+), 124 deletions(-)
> 
> diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
> index 523a3b8b5754..0a1d316ebdba 100644
> --- a/fs/xfs/xfs_filestream.c
> +++ b/fs/xfs/xfs_filestream.c
> @@ -48,19 +48,19 @@ xfs_fstrm_free_func(
>  }
>  
>  /*
> - * Scan the AGs starting at startag looking for an AG that isn't in use and has
> - * at least minlen blocks free.
> + * Scan the AGs starting at start_agno looking for an AG that isn't in use and
> + * has at least minlen blocks free. If no AG is found to match the allocation
> + * requirements, pick the AG with the most free space in it.
>   */
>  static int
>  xfs_filestream_pick_ag(
>  	struct xfs_alloc_arg	*args,
> -	struct xfs_inode	*ip,
> +	xfs_ino_t		pino,
>  	xfs_agnumber_t		start_agno,
>  	int			flags,
>  	xfs_extlen_t		*longest)
>  {
> -	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_fstrm_item	*item;
> +	struct xfs_mount	*mp = args->mp;
>  	struct xfs_perag	*pag;
>  	struct xfs_perag	*max_pag = NULL;
>  	xfs_extlen_t		minlen = *longest;
> @@ -68,8 +68,6 @@ xfs_filestream_pick_ag(
>  	xfs_agnumber_t		agno;
>  	int			err, trylock;

Who consumes trylock?  Is this supposed to get passed through to
xfs_bmap_longest_free_extent, or is the goal here merely to run the
for_each_perag_wrap loop twice before going for the most free or any old
perag?

--D

> -	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
> -
>  	/* 2% of an AG's blocks must be free for it to be chosen. */
>  	minfree = mp->m_sb.sb_agblocks / 50;
>  
> @@ -78,7 +76,7 @@ xfs_filestream_pick_ag(
>  
>  restart:
>  	for_each_perag_wrap(mp, start_agno, agno, pag) {
> -		trace_xfs_filestream_scan(pag, ip->i_ino);
> +		trace_xfs_filestream_scan(pag, pino);
>  		*longest = 0;
>  		err = xfs_bmap_longest_free_extent(pag, NULL, longest);
>  		if (err) {
> @@ -148,9 +146,9 @@ xfs_filestream_pick_ag(
>  		 * grab.
>  		 */
>  		if (!max_pag) {
> -			for_each_perag_wrap(mp, start_agno, agno, pag)
> +			for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
>  				break;
> -			atomic_inc(&pag->pagf_fstrms);
> +			atomic_inc(&args->pag->pagf_fstrms);
>  			*longest = 0;
>  		} else {
>  			pag = max_pag;
> @@ -161,44 +159,10 @@ xfs_filestream_pick_ag(
>  		xfs_perag_rele(max_pag);
>  	}
>  
> -	trace_xfs_filestream_pick(ip, pag, free);
> -
> -	err = -ENOMEM;
> -	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
> -	if (!item)
> -		goto out_put_ag;
> -
> -
> -	/*
> -	 * We are going to use this perag now, so take another ref to it for the
> -	 * allocation context returned to the caller. If we raced to create and
> -	 * insert the filestreams item into the MRU (-EEXIST), then we still
> -	 * keep this reference but free the item reference we gained above. On
> -	 * any other failure, we have to drop both.
> -	 */
> -	atomic_inc(&pag->pag_active_ref);
> -	item->pag = pag;
> +	trace_xfs_filestream_pick(pag, pino, free);
>  	args->pag = pag;
> -
> -	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
> -	if (err) {
> -		if (err == -EEXIST) {
> -			err = 0;
> -		} else {
> -			xfs_perag_rele(args->pag);
> -			args->pag = NULL;
> -		}
> -		goto out_free_item;
> -	}
> -
>  	return 0;
>  
> -out_free_item:
> -	kmem_free(item);
> -out_put_ag:
> -	atomic_dec(&pag->pagf_fstrms);
> -	xfs_perag_rele(pag);
> -	return err;
>  }
>  
>  static struct xfs_inode *
> @@ -227,29 +191,29 @@ xfs_filestream_get_parent(
>  
>  /*
>   * Lookup the mru cache for an existing association. If one exists and we can
> - * use it, return with the agno and blen indicating that the allocation will
> - * proceed with that association.
> + * use it, return with an active perag reference indicating that the allocation
> + * will proceed with that association.
>   *
>   * If we have no association, or we cannot use the current one and have to
> - * destroy it, return with blen = 0 and agno pointing at the next agno to try.
> + * destroy it, return with longest = 0 to tell the caller to create a new
> + * association.
>   */
> -int
> -xfs_filestream_select_ag_mru(
> +static int
> +xfs_filestream_lookup_association(
>  	struct xfs_bmalloca	*ap,
>  	struct xfs_alloc_arg	*args,
> -	struct xfs_inode	*pip,
> -	xfs_agnumber_t		*agno,
> -	xfs_extlen_t		*blen)
> +	xfs_ino_t		pino,
> +	xfs_extlen_t		*longest)
>  {
> -	struct xfs_mount	*mp = ap->ip->i_mount;
> +	struct xfs_mount	*mp = args->mp;
>  	struct xfs_perag	*pag;
>  	struct xfs_mru_cache_elem *mru;
> -	int			error;
> +	int			error = 0;
>  
> -	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
> +	*longest = 0;
> +	mru = xfs_mru_cache_lookup(mp->m_filestream, pino);
>  	if (!mru)
> -		goto out_default_agno;
> -
> +		return 0;
>  	/*
>  	 * Grab the pag and take an extra active reference for the caller whilst
>  	 * the mru item cannot go away. This means we'll pin the perag with
> @@ -265,103 +229,148 @@ xfs_filestream_select_ag_mru(
>  	ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0);
>  	xfs_bmap_adjacent(ap);
>  
> -	error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
> -	if (error) {
> -		/* We aren't going to use this perag */
> -		xfs_perag_rele(pag);
> -		if (error != -EAGAIN)
> -			return error;
> -		*blen = 0;
> -	}
> -
>  	/*
> -	 * We are done if there's still enough contiguous free space to succeed.
>  	 * If there is very little free space before we start a filestreams
> -	 * allocation, we're almost guaranteed to fail to find a better AG with
> -	 * larger free space available so we don't even try.
> +	 * allocation, we're almost guaranteed to fail to find a large enough
> +	 * free space available so just use the cached AG.
>  	 */
> -	*agno = pag->pag_agno;
> -	if (*blen >= args->maxlen || (ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
> -		args->pag = pag;
> -		return 0;
> +	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
> +		*longest = 1;
> +		goto out_done;
>  	}
>  
> +	error = xfs_bmap_longest_free_extent(pag, args->tp, longest);
> +	if (error == -EAGAIN)
> +		error = 0;
> +	if (error || *longest < args->maxlen) {
> +		/* We aren't going to use this perag */
> +		*longest = 0;
> +		xfs_perag_rele(pag);
> +		return error;
> +	}
> +
> +out_done:
> +	args->pag = pag;
> +	return 0;
> +}
> +
> +static int
> +xfs_filestream_create_association(
> +	struct xfs_bmalloca	*ap,
> +	struct xfs_alloc_arg	*args,
> +	xfs_ino_t		pino,
> +	xfs_extlen_t		*longest)
> +{
> +	struct xfs_mount	*mp = args->mp;
> +	struct xfs_mru_cache_elem *mru;
> +	struct xfs_fstrm_item	*item;
> +	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, pino);
> +	int			flags = 0;
> +	int			error;
> +
>  	/* Changing parent AG association now, so remove the existing one. */
> -	xfs_perag_rele(pag);
> -	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
> +	mru = xfs_mru_cache_remove(mp->m_filestream, pino);
>  	if (mru) {
>  		struct xfs_fstrm_item *item =
>  			container_of(mru, struct xfs_fstrm_item, mru);
> -		*agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
> -		xfs_fstrm_free_func(mp, mru);
> -		return 0;
> -	}
>  
> -out_default_agno:
> -	if (xfs_is_inode32(mp)) {
> +		agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
> +		xfs_fstrm_free_func(mp, mru);
> +	} else if (xfs_is_inode32(mp)) {
>  		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
> -		*agno = (mp->m_agfrotor / rotorstep) %
> -				mp->m_sb.sb_agcount;
> +
> +		agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
>  		mp->m_agfrotor = (mp->m_agfrotor + 1) %
>  				 (mp->m_sb.sb_agcount * rotorstep);
> -		return 0;
>  	}
> -	*agno = XFS_INO_TO_AGNO(mp, pip->i_ino);
> +
> +	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
> +	xfs_bmap_adjacent(ap);
> +
> +	if (ap->datatype & XFS_ALLOC_USERDATA)
> +		flags |= XFS_PICK_USERDATA;
> +	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
> +		flags |= XFS_PICK_LOWSPACE;
> +
> +	*longest = ap->length;
> +	error = xfs_filestream_pick_ag(args, pino, agno, flags, longest);
> +	if (error)
> +		return error;
> +
> +	/*
> +	 * We are going to use this perag now, so create an assoication for it.
> +	 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter
> +	 * for us, so all we need to do here is take another active reference to
> +	 * the perag for the cached association.
> +	 *
> +	 * If we fail to store the association, we need to drop the fstrms
> +	 * counter as well as drop the perag reference we take here for the
> +	 * item. We do not need to return an error for this failure - as long as
> +	 * we return a referenced AG, the allocation can still go ahead just
> +	 * fine.
> +	 */
> +	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
> +	if (!item)
> +		goto out_put_fstrms;
> +
> +	atomic_inc(&args->pag->pag_active_ref);
> +	item->pag = args->pag;
> +	error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
> +	if (error)
> +		goto out_free_item;
>  	return 0;
>  
> +out_free_item:
> +	xfs_perag_rele(item->pag);
> +	kmem_free(item);
> +out_put_fstrms:
> +	atomic_dec(&args->pag->pagf_fstrms);
> +	return 0;
>  }
>  
>  /*
>   * Search for an allocation group with a single extent large enough for
> - * the request.  If one isn't found, then adjust the minimum allocation
> - * size to the largest space found.
> + * the request. First we look for an existing association and use that if it
> + * is found. Otherwise, we create a new association by selecting an AG that fits
> + * the allocation criteria.
> + *
> + * We return with a referenced perag in args->pag to indicate which AG we are
> + * allocating into or an error with no references held.
>   */
>  int
>  xfs_filestream_select_ag(
>  	struct xfs_bmalloca	*ap,
>  	struct xfs_alloc_arg	*args,
> -	xfs_extlen_t		*blen)
> +	xfs_extlen_t		*longest)
>  {
> -	struct xfs_mount	*mp = ap->ip->i_mount;
> -	struct xfs_inode	*pip = NULL;
> -	xfs_agnumber_t		agno;
> -	int			flags = 0;
> +	struct xfs_mount	*mp = args->mp;
> +	struct xfs_inode	*pip;
> +	xfs_ino_t		ino = 0;
>  	int			error = 0;
>  
> +	*longest = 0;
>  	args->total = ap->total;
> -	*blen = 0;
> -
>  	pip = xfs_filestream_get_parent(ap->ip);
> -	if (!pip) {
> -		ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
> -		return 0;
> +	if (pip) {
> +		ino = pip->i_ino;
> +		error = xfs_filestream_lookup_association(ap, args, ino,
> +				longest);
> +		xfs_irele(pip);
> +		if (error)
> +			return error;
> +		if (*longest >= args->maxlen)
> +			goto out_select;
> +		if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
> +			goto out_select;
>  	}
>  
> -	error = xfs_filestream_select_ag_mru(ap, args, pip, &agno, blen);
> +	error = xfs_filestream_create_association(ap, args, ino, longest);
>  	if (error)
> -		goto out_rele;
> -	if (*blen >= args->maxlen)
> -		goto out_select;
> -	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
> -		goto out_select;
> -
> -	ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
> -	xfs_bmap_adjacent(ap);
> -	*blen = ap->length;
> -	if (ap->datatype & XFS_ALLOC_USERDATA)
> -		flags |= XFS_PICK_USERDATA;
> -	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
> -		flags |= XFS_PICK_LOWSPACE;
> +		return error;
>  
> -	error = xfs_filestream_pick_ag(args, pip, agno, flags, blen);
> -	if (error)
> -		goto out_rele;
>  out_select:
>  	ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0);
> -out_rele:
> -	xfs_irele(pip);
> -	return error;
> -
> +	return 0;
>  }
>  
>  void
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index b5f7d225d5b4..1d3569c0d2fe 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -668,9 +668,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
>  DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
>  
>  TRACE_EVENT(xfs_filestream_pick,
> -	TP_PROTO(struct xfs_inode *ip, struct xfs_perag *pag,
> -		 xfs_extlen_t free),
> -	TP_ARGS(ip, pag, free),
> +	TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
> +	TP_ARGS(pag, ino, free),
>  	TP_STRUCT__entry(
>  		__field(dev_t, dev)
>  		__field(xfs_ino_t, ino)
> @@ -679,8 +678,8 @@ TRACE_EVENT(xfs_filestream_pick,
>  		__field(xfs_extlen_t, free)
>  	),
>  	TP_fast_assign(
> -		__entry->dev = VFS_I(ip)->i_sb->s_dev;
> -		__entry->ino = ip->i_ino;
> +		__entry->dev = pag->pag_mount->m_super->s_dev;
> +		__entry->ino = ino;
>  		if (pag) {
>  			__entry->agno = pag->pag_agno;
>  			__entry->streams = atomic_read(&pag->pagf_fstrms);
> -- 
> 2.39.0
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux