Re: [PATCH] xfs: fix log reservation overflows when allocating large rt extents

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 04, 2019 at 08:38:09AM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> Omar Sandoval reported that a 4G fallocate on the realtime device causes
> filesystem shutdowns due to a log reservation overflow that happens when
> we log the rtbitmap updates.  Factor rtbitmap/rtsummary updates into the
> the tr_write and tr_itruncate log reservation calculation.
> 
> "The following reproducer results in a transaction log overrun warning
> for me:
> 
>     mkfs.xfs -f -r rtdev=/dev/vdc -d rtinherit=1 -m reflink=0 /dev/vdb
>     mount -o rtdev=/dev/vdc /dev/vdb /mnt
>     fallocate -l 4G /mnt/foo
> 
> Reported-by: Omar Sandoval <osandov@xxxxxxxxxxx>
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---

Looks reasonable enough given my limited knowledge on the rt bits. One
question..

>  fs/xfs/libxfs/xfs_trans_resv.c |   96 ++++++++++++++++++++++++++++++++--------
>  1 file changed, 77 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
> index c55cd9a3dec9..824073a839ac 100644
> --- a/fs/xfs/libxfs/xfs_trans_resv.c
> +++ b/fs/xfs/libxfs/xfs_trans_resv.c
> @@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res(
>  	return res;
>  }
>  
> +/*
> + * Per-extent log reservation for the btree changes involved in freeing or
> + * allocating a realtime extent.  We have to be able to log as many rtbitmap
> + * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
> + * as well as the realtime summary block.
> + */
> +unsigned int
> +xfs_rtalloc_log_count(
> +	struct xfs_mount	*mp,
> +	unsigned int		num_ops)
> +{
> +	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
> +	unsigned int		rtbmp_bytes;
> +
> +	rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
> +	return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
> +}
> +
>  /*
>   * Various log reservation values.
>   *
> @@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
>  
>  /*
>   * In a write transaction we can allocate a maximum of 2
> - * extents.  This gives:
> + * extents.  This gives (t1):
>   *    the inode getting the new extents: inode size
>   *    the inode's bmap btree: max depth * block size
>   *    the agfs of the ags from which the extents are allocated: 2 * sector
>   *    the superblock free block counter: sector size
>   *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
> - * And the bmap_finish transaction can free bmap blocks in a join:
> + * Or, if we're writing to a realtime file (t2):
> + *    the inode getting the new extents: inode size
> + *    the inode's bmap btree: max depth * block size
> + *    the agfs of the ags from which the extents are allocated: 2 * sector
> + *    the superblock free block counter: sector size
> + *    the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
> + *    the realtime summary: 1 block
> + *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size

Why do we include the allocation btrees in the rt reservations? I
thought that we'd either allocate (or free) out of one pool or the
other. Do we operate on both sets of structures in the same transaction?

Brian

> + * And the bmap_finish transaction can free bmap blocks in a join (t3):
>   *    the agfs of the ags containing the blocks: 2 * sector size
>   *    the agfls of the ags containing the blocks: 2 * sector size
>   *    the super block free block counter: sector size
> @@ -234,40 +260,72 @@ STATIC uint
>  xfs_calc_write_reservation(
>  	struct xfs_mount	*mp)
>  {
> -	return XFS_DQUOT_LOGRES(mp) +
> -		max((xfs_calc_inode_res(mp, 1) +
> +	unsigned int		t1, t2, t3;
> +	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
> +
> +	t1 = xfs_calc_inode_res(mp, 1) +
> +	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
> +	     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
> +	     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
> +
> +	if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
> +		t2 = xfs_calc_inode_res(mp, 1) +
>  		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
> -				      XFS_FSB_TO_B(mp, 1)) +
> +				      blksz) +
>  		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
> -		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
> -				      XFS_FSB_TO_B(mp, 1))),
> -		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
> -		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
> -				      XFS_FSB_TO_B(mp, 1))));
> +		     xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
> +		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
> +	} else {
> +		t2 = 0;
> +	}
> +
> +	t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
> +	     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
> +
> +	return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
>  }
>  
>  /*
> - * In truncating a file we free up to two extents at once.  We can modify:
> + * In truncating a file we free up to two extents at once.  We can modify (t1):
>   *    the inode being truncated: inode size
>   *    the inode's bmap btree: (max depth + 1) * block size
> - * And the bmap_finish transaction can free the blocks and bmap blocks:
> + * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
>   *    the agf for each of the ags: 4 * sector size
>   *    the agfl for each of the ags: 4 * sector size
>   *    the super block to reflect the freed blocks: sector size
>   *    worst case split in allocation btrees per extent assuming 4 extents:
>   *		4 exts * 2 trees * (2 * max depth - 1) * block size
> + * Or, if it's a realtime file (t3):
> + *    the agf for each of the ags: 2 * sector size
> + *    the agfl for each of the ags: 2 * sector size
> + *    the super block to reflect the freed blocks: sector size
> + *    the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
> + *    the realtime summary: 2 exts * 1 block
> + *    worst case split in allocation btrees per extent assuming 2 extents:
> + *		2 exts * 2 trees * (2 * max depth - 1) * block size
>   */
>  STATIC uint
>  xfs_calc_itruncate_reservation(
>  	struct xfs_mount	*mp)
>  {
> -	return XFS_DQUOT_LOGRES(mp) +
> -		max((xfs_calc_inode_res(mp, 1) +
> -		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
> -				      XFS_FSB_TO_B(mp, 1))),
> -		    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
> -		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
> -				      XFS_FSB_TO_B(mp, 1))));
> +	unsigned int		t1, t2, t3;
> +	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
> +
> +	t1 = xfs_calc_inode_res(mp, 1) +
> +	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
> +
> +	t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
> +	     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
> +
> +	if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
> +		t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
> +		     xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
> +		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
> +	} else {
> +		t3 = 0;
> +	}
> +
> +	return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
>  }
>  
>  /*
> 




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux