Re: [PATCH 4/6] xfs: reduce the absurdly large log reservations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Apr 14, 2022 at 03:54:48PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@xxxxxxxxxx>
> 
> Back in the early days of reflink and rmap development I set the
> transaction reservation sizes to be overly generous for rmap+reflink
> filesystems, and a little under-generous for rmap-only filesystems.
> 
> Since we don't need *eight* transaction rolls to handle three new log
> intent items, decrease the logcounts to what we actually need, and amend
> the shadow reservation computation function to reflect what we used to
> do so that the minimum log size doesn't change.

Yup, this will make a huge difference to the number of transactions
we can have in flight on reflink/rmap enabled filesystems.

Mostly looks good, some comments about code and comments below.

> Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> ---
>  fs/xfs/libxfs/xfs_trans_resv.c |   88 +++++++++++++++++++++++++++-------------
>  fs/xfs/libxfs/xfs_trans_resv.h |    6 ++-
>  2 files changed, 64 insertions(+), 30 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
> index 12d4e451e70e..8d2f04dddb65 100644
> --- a/fs/xfs/libxfs/xfs_trans_resv.c
> +++ b/fs/xfs/libxfs/xfs_trans_resv.c
> @@ -814,36 +814,16 @@ xfs_trans_resv_calc(
>  	struct xfs_mount	*mp,
>  	struct xfs_trans_resv	*resp)
>  {
> -	unsigned int		rmap_maxlevels = mp->m_rmap_maxlevels;
> -
> -	/*
> -	 * In the early days of rmap+reflink, we always set the rmap maxlevels
> -	 * to 9 even if the AG was small enough that it would never grow to
> -	 * that height.  Transaction reservation sizes influence the minimum
> -	 * log size calculation, which influences the size of the log that mkfs
> -	 * creates.  Use the old value here to ensure that newly formatted
> -	 * small filesystems will mount on older kernels.
> -	 */
> -	if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
> -		mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
> -
>  	/*
>  	 * The following transactions are logged in physical format and
>  	 * require a permanent reservation on space.
>  	 */
>  	resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
> -	if (xfs_has_reflink(mp))
> -		resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
> -	else
> -		resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
> +	resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
>  	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
>  
>  	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
> -	if (xfs_has_reflink(mp))
> -		resp->tr_itruncate.tr_logcount =
> -				XFS_ITRUNCATE_LOG_COUNT_REFLINK;
> -	else
> -		resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
> +	resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
>  	resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
>  
>  	resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
> @@ -900,10 +880,7 @@ xfs_trans_resv_calc(
>  	resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
>  
>  	resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
> -	if (xfs_has_reflink(mp))
> -		resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
> -	else
> -		resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
> +	resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
>  	resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
>  
>  	/*
> @@ -930,8 +907,26 @@ xfs_trans_resv_calc(
>  	resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
>  	resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
>  
> -	/* Put everything back the way it was.  This goes at the end. */
> -	mp->m_rmap_maxlevels = rmap_maxlevels;
> +	/* Add one logcount for BUI items that appear with rmap or reflink. */
> +	if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp)) {
> +		resp->tr_itruncate.tr_logcount++;
> +		resp->tr_write.tr_logcount++;
> +		resp->tr_qm_dqalloc.tr_logcount++;
> +	}
> +
> +	/* Add one logcount for refcount intent items. */
> +	if (xfs_has_reflink(mp)) {
> +		resp->tr_itruncate.tr_logcount++;
> +		resp->tr_write.tr_logcount++;
> +		resp->tr_qm_dqalloc.tr_logcount++;
> +	}
> +
> +	/* Add one logcount for rmap intent items. */
> +	if (xfs_has_rmapbt(mp)) {
> +		resp->tr_itruncate.tr_logcount++;
> +		resp->tr_write.tr_logcount++;
> +		resp->tr_qm_dqalloc.tr_logcount++;
> +	}

This would be much more concisely written as

	count = 0;
	if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp)) {
		count = 2;
		if (xfs_has_reflink(mp) && xfs_has_rmapbt(mp))
			count++;
	}

	resp->tr_itruncate.tr_logcount += count;
	resp->tr_write.tr_logcount += count;
	resp->tr_qm_dqalloc.tr_logcount += count;

>  }
>  
>  /*
> @@ -943,5 +938,42 @@ xfs_trans_resv_calc_logsize(
>  	struct xfs_mount	*mp,
>  	struct xfs_trans_resv	*resp)
>  {
> +	unsigned int		rmap_maxlevels = mp->m_rmap_maxlevels;
> +
> +	ASSERT(resp != M_RES(mp));
> +
> +	/*
> +	 * In the early days of rmap+reflink, we always set the rmap maxlevels
> +	 * to 9 even if the AG was small enough that it would never grow to
> +	 * that height.  Transaction reservation sizes influence the minimum
> +	 * log size calculation, which influences the size of the log that mkfs
> +	 * creates.  Use the old value here to ensure that newly formatted
> +	 * small filesystems will mount on older kernels.
> +	 */
> +	if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
> +		mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
> +
>  	xfs_trans_resv_calc(mp, resp);
> +
> +	if (xfs_has_reflink(mp)) {
> +		/*
> +		 * In the early days of reflink we set the logcounts absurdly
> +		 * high.

"In the early days of reflink, typical operation log counts were
greatly overestimated"

> +		 */
> +		resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
> +		resp->tr_itruncate.tr_logcount =
> +				XFS_ITRUNCATE_LOG_COUNT_REFLINK;
> +		resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
> +	} else if (xfs_has_rmapbt(mp)) {
> +		/*
> +		 * In the early days of non-reflink rmap we set the logcount
> +		 * too low.

"In the early days of non-reflink rmap the impact of rmap btree
updates on log counts was not taken into account at all."

> +		 */
> +		resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
> +		resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
> +		resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
> +	}
> +
> +	/* Put everything back the way it was.  This goes at the end. */
> +	mp->m_rmap_maxlevels = rmap_maxlevels;
>  }

Yeah, so I think this should all go in xfs_log_rlimit.c as it is
specific to the minimum log size calculation.

> diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
> index 9fa4863f72a4..461859f4a745 100644
> --- a/fs/xfs/libxfs/xfs_trans_resv.h
> +++ b/fs/xfs/libxfs/xfs_trans_resv.h
> @@ -73,7 +73,6 @@ struct xfs_trans_resv {
>  #define	XFS_DEFAULT_LOG_COUNT		1
>  #define	XFS_DEFAULT_PERM_LOG_COUNT	2
>  #define	XFS_ITRUNCATE_LOG_COUNT		2
> -#define	XFS_ITRUNCATE_LOG_COUNT_REFLINK	8
>  #define XFS_INACTIVE_LOG_COUNT		2
>  #define	XFS_CREATE_LOG_COUNT		2
>  #define	XFS_CREATE_TMPFILE_LOG_COUNT	2
> @@ -83,12 +82,15 @@ struct xfs_trans_resv {
>  #define	XFS_LINK_LOG_COUNT		2
>  #define	XFS_RENAME_LOG_COUNT		2
>  #define	XFS_WRITE_LOG_COUNT		2
> -#define	XFS_WRITE_LOG_COUNT_REFLINK	8
>  #define	XFS_ADDAFORK_LOG_COUNT		2
>  #define	XFS_ATTRINVAL_LOG_COUNT		1
>  #define	XFS_ATTRSET_LOG_COUNT		3
>  #define	XFS_ATTRRM_LOG_COUNT		3
>  
> +/* Absurdly high log counts from the early days of reflink.  Do not use. */
> +#define	XFS_ITRUNCATE_LOG_COUNT_REFLINK	8
> +#define	XFS_WRITE_LOG_COUNT_REFLINK	8

/*
 * Original log counts were overestimated in the early days of
 * reflink. These are retained here purely for minimum log size
 * calculations and are not to be used for runtime reservations.
 */

Cheers,

Dave.
-- 
Dave Chinner
david@xxxxxxxxxxxxx



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux