Re: [PATCH] xfs: fix xfs_reflink_allocate_cow_range

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Sep 25, 2016 at 12:30:02PM -0700, Christoph Hellwig wrote:
> The current version of xfs_reflink_allocate_cow_range may stumble over
> invalid entries in the extent array given that it drops the ilock but
> still expects the index to be stable.  Simple fixing it to a new lookup
> for every iteration still isn't correct given that xfs_bmapi_allocate
> will trigger a BUG_ON() if hitting a hole, and there is nothing
> preventing a xfs_bunmapi_cow call removing extents once we dropped the
> ilock either.
> 
> The right long term implementation would be to not do a detour through
> a delayed allocation for direct I/O and just got straight to and on-disk
> allocation.  Given how late it is in the merge window this patch instead
> duplicates the inner loop of xfs_bmapi_allocate into a helper for
> xfs_reflink_allocate_cow_range so that it can be done under the same
> ilock criticical section as our COW fork delayed allocation.

Looks ok too, so
Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>

--D

> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/xfs/xfs_file.c    |   4 --
>  fs/xfs/xfs_reflink.c | 134 +++++++++++++++++++++++++++++++++------------------
>  fs/xfs/xfs_reflink.h |   4 +-
>  fs/xfs/xfs_trace.h   |   1 -
>  4 files changed, 90 insertions(+), 53 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index aee0c4c..349f328 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -675,10 +675,6 @@ xfs_file_dio_aio_write(
>  
>  	/* If this is a block-aligned directio CoW, remap immediately. */
>  	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
> -		ret = xfs_reflink_reserve_cow_range(ip, iocb->ki_pos, count);
> -		if (ret)
> -			goto out;
> -
>  		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
>  		if (ret)
>  			goto out;
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 09e0e27..c18692c 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -253,7 +253,8 @@ static int
>  __xfs_reflink_reserve_cow(
>  	struct xfs_inode	*ip,
>  	xfs_fileoff_t		*offset_fsb,
> -	xfs_fileoff_t		end_fsb)
> +	xfs_fileoff_t		end_fsb,
> +	bool			*skipped)
>  {
>  	struct xfs_bmbt_irec	got, prev, imap;
>  	xfs_fileoff_t		orig_end_fsb;
> @@ -287,8 +288,10 @@ __xfs_reflink_reserve_cow(
>  	end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
>  
>  	/* Not shared?  Just report the (potentially capped) extent. */
> -	if (!shared)
> +	if (!shared) {
> +		*skipped = true;
>  		goto done;
> +	}
>  
>  	/*
>  	 * Fork all the shared blocks from our write offset until the end of
> @@ -337,6 +340,7 @@ xfs_reflink_reserve_cow_range(
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
>  	xfs_fileoff_t		offset_fsb, end_fsb;
> +	bool			skipped = false;
>  	int			error;
>  
>  	trace_xfs_reflink_reserve_cow_range(ip, offset, count);
> @@ -346,7 +350,8 @@ xfs_reflink_reserve_cow_range(
>  
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
>  	while (offset_fsb < end_fsb) {
> -		error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb);
> +		error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
> +				&skipped);
>  		if (error) {
>  			trace_xfs_reflink_reserve_cow_range_error(ip, error,
>  				_RET_IP_);
> @@ -358,63 +363,100 @@ xfs_reflink_reserve_cow_range(
>  	return error;
>  }
>  
> -/*
> - * Allocate blocks to all CoW reservations within a byte range of a file.
> - */
> -int
> -xfs_reflink_allocate_cow_range(
> +static int
> +__xfs_reflink_allocate_cow(
>  	struct xfs_inode	*ip,
> -	xfs_off_t		pos,
> -	xfs_off_t		len)
> +	xfs_fileoff_t		*offset_fsb,
> +	xfs_fileoff_t		end_fsb)
>  {
> -	struct xfs_ifork	*ifp;
> -	struct xfs_bmbt_rec_host	*gotp;
> +	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_bmbt_irec	imap;
> -	int			error = 0;
> -	xfs_fileoff_t		start_lblk;
> -	xfs_fileoff_t		end_lblk;
> -	xfs_extnum_t		idx;
> +	struct xfs_defer_ops	dfops;
> +	struct xfs_trans	*tp;
> +	xfs_fsblock_t		first_block;
> +	xfs_fileoff_t		next_fsb;
> +	int			nimaps = 1, error;
> +	bool			skipped = false;
>  
> -	if (!xfs_is_reflink_inode(ip))
> -		return 0;
> +	xfs_defer_init(&dfops, &first_block);
>  
> -	trace_xfs_reflink_allocate_cow_range(ip, len, pos);
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> +			XFS_TRANS_RESERVE, &tp);
> +	if (error)
> +		return error;
>  
> -	start_lblk = XFS_B_TO_FSBT(ip->i_mount, pos);
> -	end_lblk = XFS_B_TO_FSB(ip->i_mount, pos + len);
> -	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
>  
> -	gotp = xfs_iext_bno_to_ext(ifp, start_lblk, &idx);
> -	while (gotp) {
> -		xfs_bmbt_get_all(gotp, &imap);
> +	next_fsb = *offset_fsb;
> +	error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
> +	if (error)
> +		goto out_trans_cancel;
>  
> -		if (imap.br_startoff >= end_lblk)
> -			break;
> -		if (!isnullstartblock(imap.br_startblock))
> -			goto advloop;
> -		xfs_trim_extent(&imap, start_lblk, end_lblk - start_lblk);
> -		trace_xfs_reflink_allocate_cow_extent(ip, &imap);
> -
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK,
> -				XFS_FSB_TO_B(ip->i_mount, imap.br_startoff +
> -						imap.br_blockcount - 1), &imap);
> -		xfs_ilock(ip, XFS_ILOCK_EXCL);
> -		if (error)
> -			break;
> -advloop:
> -		/* Roll on... */
> -		idx++;
> -		if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
> -			break;
> -		gotp = xfs_iext_get_ext(ifp, idx);
> +	if (skipped) {
> +		*offset_fsb = next_fsb;
> +		goto out_trans_cancel;
>  	}
>  
> +	xfs_trans_ijoin(tp, ip, 0);
> +	error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
> +			XFS_BMAPI_COWFORK, &first_block,
> +			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
> +			&imap, &nimaps, &dfops);
> +	if (error)
> +		goto out_trans_cancel;
> +
> +	/* We might not have been able to map the whole delalloc extent */
> +	*offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
> +
> +	error = xfs_defer_finish(&tp, &dfops, NULL);
> +	if (error)
> +		goto out_trans_cancel;
> +
> +	error = xfs_trans_commit(tp);
> +
> +out_unlock:
>  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	return error;
> +out_trans_cancel:
> +	xfs_defer_cancel(&dfops);
> +	xfs_trans_cancel(tp);
> +	goto out_unlock;
> +}
>  
> +/*
> + * Allocate blocks to all CoW reservations within a byte range of a file.
> + */
> +int
> +xfs_reflink_allocate_cow_range(
> +	struct xfs_inode	*ip,
> +	xfs_off_t		offset,
> +	xfs_off_t		count)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> +	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
> +	int			error;
> +
> +	ASSERT(xfs_is_reflink_inode(ip));
> +
> +	trace_xfs_reflink_allocate_cow_range(ip, offset, count);
> +
> +	/*
> +	 * Make sure that the dquots are there.
> +	 */
> +	error = xfs_qm_dqattach(ip, 0);
>  	if (error)
> -		trace_xfs_reflink_allocate_cow_range_error(ip, error, _RET_IP_);
> +		return error;
> +
> +	while (offset_fsb < end_fsb) {
> +		error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
> +		if (error) {
> +			trace_xfs_reflink_allocate_cow_range_error(ip, error,
> +					_RET_IP_);
> +			break;
> +		}
> +	}
> +
>  	return error;
>  }
>  
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index 0e19ec6..859ca50 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -28,8 +28,8 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
>  
>  extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
>  		xfs_off_t offset, xfs_off_t count);
> -extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, xfs_off_t pos,
> -		xfs_off_t len);
> +extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
> +		xfs_off_t offset, xfs_off_t count);
>  extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
>  		struct xfs_bmbt_irec *imap, bool *need_alloc);
>  extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index c8fb91c..26113b9 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -3320,7 +3320,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
>  
>  DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
>  DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
> -DEFINE_INODE_IREC_EVENT(xfs_reflink_allocate_cow_extent);
>  
>  DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
>  DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
> -- 
> 2.1.4
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux