Re: [PATCH 21/26] xfs: make the RT allocator rtgroup aware

Dave Chinner <david@xxxxxxxxxxxxx> · Mon, 26 Aug 2024 14:56:37 +1000

On Thu, Aug 22, 2024 at 05:26:38PM -0700, Darrick J. Wong wrote:
> From: Christoph Hellwig <hch@xxxxxx>
> 
> Make the allocator rtgroup aware by either picking a specific group if
> there is a hint, or loop over all groups otherwise.  A simple rotor is
> provided to pick the placement for initial allocations.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> ---
>  fs/xfs/libxfs/xfs_bmap.c     |   13 +++++-
>  fs/xfs/libxfs/xfs_rtbitmap.c |    6 ++-
>  fs/xfs/xfs_mount.h           |    1 
>  fs/xfs/xfs_rtalloc.c         |   98 ++++++++++++++++++++++++++++++++++++++----
>  4 files changed, 105 insertions(+), 13 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 126a0d253654a..88c62e1158ac7 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -3151,8 +3151,17 @@ xfs_bmap_adjacent_valid(
>  	struct xfs_mount	*mp = ap->ip->i_mount;
>  
>  	if (XFS_IS_REALTIME_INODE(ap->ip) &&
> -	    (ap->datatype & XFS_ALLOC_USERDATA))
> -		return x < mp->m_sb.sb_rblocks;
> +	    (ap->datatype & XFS_ALLOC_USERDATA)) {
> +		if (x >= mp->m_sb.sb_rblocks)
> +			return false;
> +		if (!xfs_has_rtgroups(mp))
> +			return true;
> +
> +		return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) &&
> +			xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount &&
> +			xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents;

WHy do we need the xfs_has_rtgroups() check here? The new rtg logic will
return true for an old school rt device here, right?

> diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> index 3fedc552b51b0..2b57ff2687bf6 100644
> --- a/fs/xfs/xfs_rtalloc.c
> +++ b/fs/xfs/xfs_rtalloc.c
> @@ -1661,8 +1661,9 @@ xfs_rtalloc_align_minmax(
>  }
>  
>  static int
> -xfs_rtallocate(
> +xfs_rtallocate_rtg(
>  	struct xfs_trans	*tp,
> +	xfs_rgnumber_t		rgno,
>  	xfs_rtblock_t		bno_hint,
>  	xfs_rtxlen_t		minlen,
>  	xfs_rtxlen_t		maxlen,
> @@ -1682,16 +1683,33 @@ xfs_rtallocate(
>  	xfs_rtxlen_t		len = 0;
>  	int			error = 0;
>  
> -	args.rtg = xfs_rtgroup_grab(args.mp, 0);
> +	args.rtg = xfs_rtgroup_grab(args.mp, rgno);
>  	if (!args.rtg)
>  		return -ENOSPC;
>  
>  	/*
> -	 * Lock out modifications to both the RT bitmap and summary inodes.
> +	 * We need to lock out modifications to both the RT bitmap and summary
> +	 * inodes for finding free space in xfs_rtallocate_extent_{near,size}
> +	 * and join the bitmap and summary inodes for the actual allocation
> +	 * down in xfs_rtallocate_range.
> +	 *
> +	 * For RTG-enabled file system we don't want to join the inodes to the
> +	 * transaction until we are committed to allocate to allocate from this
> +	 * RTG so that only one inode of each type is locked at a time.
> +	 *
> +	 * But for pre-RTG file systems we need to already to join the bitmap
> +	 * inode to the transaction for xfs_rtpick_extent, which bumps the
> +	 * sequence number in it, so we'll have to join the inode to the
> +	 * transaction early here.
> +	 *
> +	 * This is all a bit messy, but at least the mess is contained in
> +	 * this function.
>  	 */
>  	if (!*rtlocked) {
>  		xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP);
> -		xfs_rtgroup_trans_join(tp, args.rtg, XFS_RTGLOCK_BITMAP);
> +		if (!xfs_has_rtgroups(args.mp))
> +			xfs_rtgroup_trans_join(tp, args.rtg,
> +					XFS_RTGLOCK_BITMAP);
>  		*rtlocked = true;
>  	}
>  
> @@ -1701,7 +1719,7 @@ xfs_rtallocate(
>  	 */
>  	if (bno_hint)
>  		start = xfs_rtb_to_rtx(args.mp, bno_hint);
> -	else if (initial_user_data)
> +	else if (!xfs_has_rtgroups(args.mp) && initial_user_data)
>  		start = xfs_rtpick_extent(args.rtg, tp, maxlen);

Check initial_user_data first - we don't care if there are rtgroups
enabled if initial_user_data is not true, and we only ever allocate
initial data on an inode once...

> @@ -1741,6 +1767,53 @@ xfs_rtallocate(
>  	return error;
>  }
>  
> +static int
> +xfs_rtallocate_rtgs(
> +	struct xfs_trans	*tp,
> +	xfs_fsblock_t		bno_hint,
> +	xfs_rtxlen_t		minlen,
> +	xfs_rtxlen_t		maxlen,
> +	xfs_rtxlen_t		prod,
> +	bool			wasdel,
> +	bool			initial_user_data,
> +	xfs_rtblock_t		*bno,
> +	xfs_extlen_t		*blen)
> +{
> +	struct xfs_mount	*mp = tp->t_mountp;
> +	xfs_rgnumber_t		start_rgno, rgno;
> +	int			error;
> +
> +	/*
> +	 * For now this just blindly iterates over the RTGs for an initial
> +	 * allocation.  We could try to keep an in-memory rtg_longest member
> +	 * to avoid the locking when just looking for big enough free space,
> +	 * but for now this keep things simple.
> +	 */
> +	if (bno_hint != NULLFSBLOCK)
> +		start_rgno = xfs_rtb_to_rgno(mp, bno_hint);
> +	else
> +		start_rgno = (atomic_inc_return(&mp->m_rtgrotor) - 1) %
> +				mp->m_sb.sb_rgcount;
> +
> +	rgno = start_rgno;
> +	do {
> +		bool		rtlocked = false;
> +
> +		error = xfs_rtallocate_rtg(tp, rgno, bno_hint, minlen, maxlen,
> +				prod, wasdel, initial_user_data, &rtlocked,
> +				bno, blen);
> +		if (error != -ENOSPC)
> +			return error;
> +		ASSERT(!rtlocked);
> +
> +		if (++rgno == mp->m_sb.sb_rgcount)
> +			rgno = 0;
> +		bno_hint = NULLFSBLOCK;
> +	} while (rgno != start_rgno);
> +
> +	return -ENOSPC;
> +}
> +
>  static int
>  xfs_rtallocate_align(
>  	struct xfs_bmalloca	*ap,
> @@ -1835,9 +1908,16 @@ xfs_bmap_rtalloc(
>  	if (xfs_bmap_adjacent(ap))
>  		bno_hint = ap->blkno;
>  
> -	error = xfs_rtallocate(ap->tp, bno_hint, raminlen, ralen, prod,
> -			ap->wasdel, initial_user_data, &rtlocked,
> -			&ap->blkno, &ap->length);
> +	if (xfs_has_rtgroups(ap->ip->i_mount)) {
> +		error = xfs_rtallocate_rtgs(ap->tp, bno_hint, raminlen, ralen,
> +				prod, ap->wasdel, initial_user_data,
> +				&ap->blkno, &ap->length);
> +	} else {
> +		error = xfs_rtallocate_rtg(ap->tp, 0, bno_hint, raminlen, ralen,
> +				prod, ap->wasdel, initial_user_data,
> +				&rtlocked, &ap->blkno, &ap->length);
> +	}

The xfs_has_rtgroups() check is unnecessary.  The iterator in
xfs_rtallocate_rtgs() will do the right thing for the
!xfs_has_rtgroups() case - it'll set start_rgno = 0 and break out
after a single call to xfs_rtallocate_rtg() with rgno = 0.

Another thing that probably should be done here is push all the
constant value calculations a couple of functions down the stack to
where they are used. Then we only need to pass two parameters down
through the rg iterator here, not 11...

-Dave.
-- 
Dave Chinner
david@xxxxxxxxxxxxx