Re: [PATCH 05/14] xfs: introduce BMAPI_ZERO for allocating zeroed extents

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Feb 15, 2016 at 05:18:16PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Source kernel commit 3fbbbea34bac049c0b5938dc065f7d8ee1ef7e67
> 
> To enable DAX to do atomic allocation of zeroed extents, we need to
> drive the block zeroing deep into the allocator. Because
> xfs_bmapi_write() can return merged extents on allocation that were
> only partially allocated (i.e. requested range spans allocated and
> hole regions, allocation into the hole was contiguous), we cannot
> zero the extent returned from xfs_bmapi_write() as that can
> overwrite existing data with zeros.
> 
> Hence we have to drive the extent zeroing into the allocation code,
> prior to where we merge the extents into the BMBT and return the
> resultant map. This means we need to propagate this need down to
> the xfs_alloc_vextent() and issue the block zeroing at this point.
> 
> While this functionality is being introduced for DAX, there is no
> reason why it is specific to DAX - we can per-zero blocks during the
> allocation transaction on any type of device. It's just slow (and
> usually slower than unwritten allocation and conversion) on
> traditional block devices so doesn't tend to get used. We can,
> however, hook hardware zeroing optimisations via sb_issue_zeroout()
> to this operation, so it may be useful in future and hence the
> "allocate zeroed blocks" API needs to be implementation neutral.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>
> Signed-off-by: Dave Chinner <david@xxxxxxxxxxxxx>
> ---
>  include/libxfs.h         |  1 -
>  libxfs/libxfs_api_defs.h |  1 +
>  libxfs/libxfs_io.h       |  2 ++
>  libxfs/libxfs_priv.h     |  3 +++
>  libxfs/rdwr.c            |  4 +++-
>  libxfs/util.c            | 35 +++++++++++++++++++++++++++++++++++
>  libxfs/xfs_alloc.c       | 10 +++++++++-
>  libxfs/xfs_alloc.h       |  8 +++++---
>  libxfs/xfs_bmap.c        | 35 +++++++++++++++++++++++++++++++++--
>  libxfs/xfs_bmap.h        | 13 +++++++++++--
>  10 files changed, 102 insertions(+), 10 deletions(-)
> 
...
> diff --git a/libxfs/util.c b/libxfs/util.c
> index 90031fd..ee4bf3c 100644
> --- a/libxfs/util.c
> +++ b/libxfs/util.c
...
> @@ -770,3 +772,36 @@ xfs_log_check_lsn(
...
> +int
> +libxfs_zero_extent(
> +	struct xfs_inode *ip,
> +	xfs_fsblock_t	start_fsb,
> +	xfs_off_t	count_fsb)
> +{
> +	xfs_daddr_t	sector = xfs_fsb_to_db(ip, start_fsb);
> +	ssize_t		size = XFS_FSB_TO_BB(ip->i_mount, count_fsb);
> +
> +	return libxfs_device_zero(xfs_find_bdev_for_inode(ip), sector, size);
> +}
> +

Extra whitespace ^, otherwise looks good:

Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>

> diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
> index 12d59df..af40270 100644
> --- a/libxfs/xfs_alloc.c
> +++ b/libxfs/xfs_alloc.c
> @@ -2508,7 +2508,7 @@ xfs_alloc_vextent(
>  		 * Try near allocation first, then anywhere-in-ag after
>  		 * the first a.g. fails.
>  		 */
> -		if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
> +		if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
>  		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
>  			args->fsbno = XFS_AGB_TO_FSB(mp,
>  					((mp->m_agfrotor / rotorstep) %
> @@ -2639,6 +2639,14 @@ xfs_alloc_vextent(
>  		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
>  			args->len);
>  #endif
> +
> +		/* Zero the extent if we were asked to do so */
> +		if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
> +			error = xfs_zero_extent(args->ip, args->fsbno, args->len);
> +			if (error)
> +				goto error0;
> +		}
> +
>  	}
>  	xfs_perag_put(args->pag);
>  	return 0;
> diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
> index 071b28b..135eb3d 100644
> --- a/libxfs/xfs_alloc.h
> +++ b/libxfs/xfs_alloc.h
> @@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg {
>  	struct xfs_mount *mp;		/* file system mount point */
>  	struct xfs_buf	*agbp;		/* buffer for a.g. freelist header */
>  	struct xfs_perag *pag;		/* per-ag struct for this agno */
> +	struct xfs_inode *ip;		/* for userdata zeroing method */
>  	xfs_fsblock_t	fsbno;		/* file system block number */
>  	xfs_agnumber_t	agno;		/* allocation group number */
>  	xfs_agblock_t	agbno;		/* allocation group-relative block # */
> @@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg {
>  	char		wasdel;		/* set if allocation was prev delayed */
>  	char		wasfromfl;	/* set if allocation is from freelist */
>  	char		isfl;		/* set if is freelist blocks - !acctg */
> -	char		userdata;	/* set if this is user data */
> +	char		userdata;	/* mask defining userdata treatment */
>  	xfs_fsblock_t	firstblock;	/* io first block allocated */
>  } xfs_alloc_arg_t;
>  
>  /*
>   * Defines for userdata
>   */
> -#define XFS_ALLOC_USERDATA		1	/* allocation is for user data*/
> -#define XFS_ALLOC_INITIAL_USER_DATA	2	/* special case start of file */
> +#define XFS_ALLOC_USERDATA		(1 << 0)/* allocation is for user data*/
> +#define XFS_ALLOC_INITIAL_USER_DATA	(1 << 1)/* special case start of file */
> +#define XFS_ALLOC_USERDATA_ZERO		(1 << 2)/* zero extent on allocation */
>  
>  xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
>  		struct xfs_perag *pag, xfs_extlen_t need);
> diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
> index 8e19b50..a38549c 100644
> --- a/libxfs/xfs_bmap.c
> +++ b/libxfs/xfs_bmap.c
> @@ -3795,8 +3795,13 @@ xfs_bmap_btalloc(
>  	args.wasdel = ap->wasdel;
>  	args.isfl = 0;
>  	args.userdata = ap->userdata;
> -	if ((error = xfs_alloc_vextent(&args)))
> +	if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
> +		args.ip = ap->ip;
> +
> +	error = xfs_alloc_vextent(&args);
> +	if (error)
>  		return error;
> +
>  	if (tryagain && args.fsbno == NULLFSBLOCK) {
>  		/*
>  		 * Exact allocation failed. Now try with alignment
> @@ -4295,11 +4300,14 @@ xfs_bmapi_allocate(
>  
>  	/*
>  	 * Indicate if this is the first user data in the file, or just any
> -	 * user data.
> +	 * user data. And if it is userdata, indicate whether it needs to
> +	 * be initialised to zero during allocation.
>  	 */
>  	if (!(bma->flags & XFS_BMAPI_METADATA)) {
>  		bma->userdata = (bma->offset == 0) ?
>  			XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
> +		if (bma->flags & XFS_BMAPI_ZERO)
> +			bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
>  	}
>  
>  	bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
> @@ -4414,6 +4422,17 @@ xfs_bmapi_convert_unwritten(
>  	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
>  				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
>  
> +	/*
> +	 * Before insertion into the bmbt, zero the range being converted
> +	 * if required.
> +	 */
> +	if (flags & XFS_BMAPI_ZERO) {
> +		error = xfs_zero_extent(bma->ip, mval->br_startblock,
> +					mval->br_blockcount);
> +		if (error)
> +			return error;
> +	}
> +
>  	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
>  			&bma->cur, mval, bma->firstblock, bma->flist,
>  			&tmp_logflags);
> @@ -4507,6 +4526,18 @@ xfs_bmapi_write(
>  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
>  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
>  
> +	/* zeroing is for currently only for data extents, not metadata */
> +	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
> +			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
> +	/*
> +	 * we can allocate unwritten extents or pre-zero allocated blocks,
> +	 * but it makes no sense to do both at once. This would result in
> +	 * zeroing the unwritten extent twice, but it still being an
> +	 * unwritten extent....
> +	 */
> +	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
> +			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
> +
>  	if (unlikely(XFS_TEST_ERROR(
>  	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
>  	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> diff --git a/libxfs/xfs_bmap.h b/libxfs/xfs_bmap.h
> index d3daf6d..baec27d 100644
> --- a/libxfs/xfs_bmap.h
> +++ b/libxfs/xfs_bmap.h
> @@ -52,9 +52,9 @@ struct xfs_bmalloca {
>  	xfs_extlen_t		minleft; /* amount must be left after alloc */
>  	bool			eof;	/* set if allocating past last extent */
>  	bool			wasdel;	/* replacing a delayed allocation */
> -	bool			userdata;/* set if is user data */
>  	bool			aeof;	/* allocated space at eof */
>  	bool			conv;	/* overwriting unwritten extents */
> +	char			userdata;/* userdata mask */
>  	int			flags;
>  };
>  
> @@ -109,6 +109,14 @@ typedef	struct xfs_bmap_free
>   */
>  #define XFS_BMAPI_CONVERT	0x040
>  
> +/*
> + * allocate zeroed extents - this requires all newly allocated user data extents
> + * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set.
> + * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found
> + * during the allocation range to zeroed written extents.
> + */
> +#define XFS_BMAPI_ZERO		0x080
> +
>  #define XFS_BMAPI_FLAGS \
>  	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
>  	{ XFS_BMAPI_METADATA,	"METADATA" }, \
> @@ -116,7 +124,8 @@ typedef	struct xfs_bmap_free
>  	{ XFS_BMAPI_PREALLOC,	"PREALLOC" }, \
>  	{ XFS_BMAPI_IGSTATE,	"IGSTATE" }, \
>  	{ XFS_BMAPI_CONTIG,	"CONTIG" }, \
> -	{ XFS_BMAPI_CONVERT,	"CONVERT" }
> +	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
> +	{ XFS_BMAPI_ZERO,	"ZERO" }
>  
>  
>  static inline int xfs_bmapi_aflag(int w)
> -- 
> 2.5.0
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux