Re: [PATCH 4/3] xfs: EOF blocks are not busy extents

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Feb 18, 2019 at 01:26:19PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Userdata extents are considered as "busy extents" when they are
> freed to ensure that they ar enot reallocated and written to before
> the transaction that frees the user data extent has been committed
> to disk.
> 
> However, in the case of post EOF blocks, these block have
> never been exposed to user applications and so don't contain valid
> data. Hence they don't need to be considered "busy" when they've
> been freed because there is no data in them that can be destroyed
> if they are reallocated and have data written to them before the
> free transaction is committed.
> 
> We already have XFS_BMAPI_NODISCARD to extent freeing that the data

"to extent freeing" ?

> extent has never been used so it doesn't need discards issued on it.
> This new functionality is just an extension of that concept - the
> extent is actually unused, so doesn't even need to be marked busy.
> 
> Hence fix this by adding XFS_BMAPI_UNUSED and update the EOF block
> data extent truncate with XFS_BMAPI_UNUSED and propagate that all
> the way through the various structures an use it to avoid inserting
> the extent into the busy list.
> 
> [ Note: this seems like a bit of a hack, but I just don't see the
> point of inserting it into the busy list, then adding a new busy
> list unused flag, then allowing every allocation type to use it in
> the _trim code, then have every caller have to call _reuse to remove
> the range from the busy tree. It just seems like complexity that
> doesn't need to exist because anyone can reallocate an
> unused extent for immediate use. ]
> 

Yeah, I'm not a fan of adding that kind of noop code to achieve behavior
that's equivalent to bypassing subsystems or removing unnecessary code.

> This avoids the problem of free space fragmentation when multiple
> files are written sequentially  via synchronous writes or post-write
> fsync calls before the next file is written. This results in the
> post-eof blocks being marked busy and can't be immediately
> reallocated resulting in the files packing poorly and unnecessarily
> leaving free space between them.
> 
> Freespace fragmentation from sequential multi-file synchronous write
> workload:
> 
> Before:
>   from      to extents  blocks    pct
>       1       1       7       7   0.00
>       2       3      34      80   0.00
>       4       7      65     345   0.01
>       8      15     208    2417   0.05
>      16      31     147    2982   0.06
>      32      63       1      49   0.00
> 1048576 1310720       4 5185064  99.89
> 
> After:
>    from      to extents  blocks    pct
>       1       1       3       3   0.00
>       2       3       1       3   0.00
> 1048576 1310720       4 5190871 100.00
> 
> Much better.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---

This seems reasonable to me in principle. Some comments...

>  fs/xfs/libxfs/xfs_alloc.c  | 7 +++----
>  fs/xfs/libxfs/xfs_bmap.c   | 6 +++---
>  fs/xfs/libxfs/xfs_bmap.h   | 8 ++++++--
>  fs/xfs/xfs_bmap_util.c     | 2 +-
>  fs/xfs/xfs_trans_extfree.c | 6 +++---
>  5 files changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> index 659bb9133955..729136ef0ed1 100644
> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -3014,7 +3014,7 @@ __xfs_free_extent(
>  	xfs_extlen_t			len,
>  	const struct xfs_owner_info	*oinfo,
>  	enum xfs_ag_resv_type		type,
> -	bool				skip_discard)
> +	bool				unused)
>  {
>  	struct xfs_mount		*mp = tp->t_mountp;
>  	struct xfs_buf			*agbp;
> @@ -3045,9 +3045,8 @@ __xfs_free_extent(
>  	if (error)
>  		goto err;
>  
> -	if (skip_discard)
> -		busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
> -	xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
> +	if (!unused)
> +		xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);

busy_flags now serves no purpose in this function (it's hardcoded to
zero).

>  	return 0;
>  
>  err:
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 332eefa2700b..ba0fd80eede4 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -537,7 +537,7 @@ __xfs_bmap_add_free(
>  	xfs_fsblock_t			bno,
>  	xfs_filblks_t			len,
>  	const struct xfs_owner_info	*oinfo,
> -	bool				skip_discard)
> +	bool				unused)
>  {
>  	struct xfs_extent_free_item	*new;		/* new element */
>  #ifdef DEBUG
> @@ -565,7 +565,7 @@ __xfs_bmap_add_free(
>  		new->xefi_oinfo = *oinfo;
>  	else
>  		new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
> -	new->xefi_skip_discard = skip_discard;
> +	new->xefi_unused = unused;
>  	trace_xfs_bmap_free_defer(tp->t_mountp,
>  			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
>  			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
> @@ -5069,7 +5069,7 @@ xfs_bmap_del_extent_real(
>  		} else {
>  			__xfs_bmap_add_free(tp, del->br_startblock,
>  					del->br_blockcount, NULL,
> -					(bflags & XFS_BMAPI_NODISCARD) ||
> +					(bflags & XFS_BMAPI_UNUSED) ||
>  					del->br_state == XFS_EXT_UNWRITTEN);

Note that this does technically change unrelated behavior as we now
consider unwritten extents as unused (along with post-eof blocks). I
think that's still fine for similar reasons as for eofblocks, but this
should at least be noted in the commit log.

>  		}
>  	}
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index 09d3ea97cc15..33fb95f84ea0 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -54,7 +54,7 @@ struct xfs_extent_free_item
>  	xfs_extlen_t		xefi_blockcount;/* number of blocks in extent */
>  	struct list_head	xefi_list;
>  	struct xfs_owner_info	xefi_oinfo;	/* extent owner */
> -	bool			xefi_skip_discard;
> +	bool			xefi_unused;
>  };
>  
>  #define	XFS_BMAP_MAX_NMAP	4
> @@ -107,6 +107,9 @@ struct xfs_extent_free_item
>  /* Do not update the rmap btree.  Used for reconstructing bmbt from rmapbt. */
>  #define XFS_BMAPI_NORMAP	0x2000
>  
> +/* Unused freed data extent, no need to mark busy */
> +#define XFS_BMAPI_UNUSED	0x4000
> +
>  #define XFS_BMAPI_FLAGS \
>  	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
>  	{ XFS_BMAPI_METADATA,	"METADATA" }, \
> @@ -120,7 +123,8 @@ struct xfs_extent_free_item
>  	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
>  	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }, \
>  	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
> -	{ XFS_BMAPI_NORMAP,	"NORMAP" }
> +	{ XFS_BMAPI_NORMAP,	"NORMAP" }, \
> +	{ XFS_BMAPI_UNUSED,	"UNUSED" }

We can kill XFS_BMAPI_NODISCARD since _UNUSED replaces the only usage of
it. I suppose we might as well just rename it as you've done for the
related plumbing.

Brian

>  
>  
>  static inline int xfs_bmapi_aflag(int w)
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index af2e30d33794..f5a8a4385512 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -841,7 +841,7 @@ xfs_free_eofblocks(
>  		 * may be full of holes (ie NULL files bug).
>  		 */
>  		error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
> -					XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
> +					XFS_ISIZE(ip), XFS_BMAPI_UNUSED);
>  		if (error) {
>  			/*
>  			 * If we get an error at this point we simply don't
> diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
> index 0710434eb240..d06fb2cd6ffb 100644
> --- a/fs/xfs/xfs_trans_extfree.c
> +++ b/fs/xfs/xfs_trans_extfree.c
> @@ -58,7 +58,7 @@ xfs_trans_free_extent(
>  	xfs_fsblock_t			start_block,
>  	xfs_extlen_t			ext_len,
>  	const struct xfs_owner_info	*oinfo,
> -	bool				skip_discard)
> +	bool				unused)
>  {
>  	struct xfs_mount		*mp = tp->t_mountp;
>  	struct xfs_extent		*extp;
> @@ -71,7 +71,7 @@ xfs_trans_free_extent(
>  	trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
>  
>  	error = __xfs_free_extent(tp, start_block, ext_len,
> -				  oinfo, XFS_AG_RESV_NONE, skip_discard);
> +				  oinfo, XFS_AG_RESV_NONE, unused);
>  	/*
>  	 * Mark the transaction dirty, even on error. This ensures the
>  	 * transaction is aborted, which:
> @@ -184,7 +184,7 @@ xfs_extent_free_finish_item(
>  	error = xfs_trans_free_extent(tp, done_item,
>  			free->xefi_startblock,
>  			free->xefi_blockcount,
> -			&free->xefi_oinfo, free->xefi_skip_discard);
> +			&free->xefi_oinfo, free->xefi_unused);
>  	kmem_free(free);
>  	return error;
>  }



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux