Re: [PATCH 41/63] xfs: reflink extents from one file to another

Brian Foster <bfoster@xxxxxxxxxx> · Fri, 7 Oct 2016 14:04:15 -0400

On Thu, Sep 29, 2016 at 08:10:05PM -0700, Darrick J. Wong wrote:
> Reflink extents from one file to another; that is to say, iteratively
> remove the mappings from the destination file, copy the mappings from
> the source file to the destination file, and increment the reference
> count of all the blocks that got remapped.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
> v2: Call xfs_defer_cancel before cancelling the transaction if the
> remap operation fails.  Use the deferred operations system to avoid
> deadlocks or blowing out the transaction reservation, and make the
> entire reflink operation atomic for each extent being remapped.  The
> destination file's i_size will be updated if necessary to avoid
> violating the assumption that there are no shared blocks past the EOF
> block.
> ---
>  fs/xfs/xfs_reflink.c |  425 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_reflink.h |    2 
>  2 files changed, 427 insertions(+)
> 
> 
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 673ecc1..94c19fff 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -922,3 +922,428 @@ xfs_reflink_recover_cow(
>  
>  	return error;
>  }
...
> +/*
> + * Unmap a range of blocks from a file, then map other blocks into the hole.
> + * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount).
> + * The extent irec is mapped into dest at irec->br_startoff.
> + */
> +STATIC int
> +xfs_reflink_remap_extent(
> +	struct xfs_inode	*ip,
> +	struct xfs_bmbt_irec	*irec,
> +	xfs_fileoff_t		destoff,
> +	xfs_off_t		new_isize)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_trans	*tp;
> +	xfs_fsblock_t		firstfsb;
> +	unsigned int		resblks;
> +	struct xfs_defer_ops	dfops;
> +	struct xfs_bmbt_irec	uirec;
> +	bool			real_extent;
> +	xfs_filblks_t		rlen;
> +	xfs_filblks_t		unmap_len;
> +	xfs_off_t		newlen;
> +	int			error;
> +
> +	unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
> +	trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
> +
> +	/* Only remap normal extents. */
> +	real_extent =  (irec->br_startblock != HOLESTARTBLOCK &&
> +			irec->br_startblock != DELAYSTARTBLOCK &&
> +			!ISUNWRITTEN(irec));
> +
> +	/* Start a rolling transaction to switch the mappings */
> +	resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
> +	if (error)
> +		goto out;
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	xfs_trans_ijoin(tp, ip, 0);
> +
> +	/* If we're not just clearing space, then do we have enough quota? */
> +	if (real_extent) {
> +		error = xfs_trans_reserve_quota_nblks(tp, ip,
> +				irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS);
> +		if (error)
> +			goto out_cancel;
> +	}
> +
> +	trace_xfs_reflink_remap(ip, irec->br_startoff,
> +				irec->br_blockcount, irec->br_startblock);
> +
> +	/* Unmap the old blocks in the data fork. */
> +	rlen = unmap_len;
> +	while (rlen) {
> +		xfs_defer_init(&dfops, &firstfsb);
> +		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
> +				&firstfsb, &dfops);
> +		if (error)
> +			goto out_defer;
> +
> +		/* Trim the extent to whatever got unmapped. */
> +		uirec = *irec;
> +		xfs_trim_extent(&uirec, destoff + rlen, unmap_len - rlen);
> +		unmap_len = rlen;
> +
> +		/* If this isn't a real mapping, we're done. */
> +		if (!real_extent || uirec.br_blockcount == 0)
> +			goto next_extent;
> +

Any reason we couldn't reuse existing mechanisms for this? E.g., hole
punch the dest file range before we remap the source file extents. That
might change behavior in the event of a partial/failed reflink, but it's
not clear to me that matters.

> +		trace_xfs_reflink_remap(ip, uirec.br_startoff,
> +				uirec.br_blockcount, uirec.br_startblock);
> +
...
> +}
> +
> +/*
> + * Iteratively remap one file's extents (and holes) to another's.
> + */
> +STATIC int
> +xfs_reflink_remap_blocks(
> +	struct xfs_inode	*src,
> +	xfs_fileoff_t		srcoff,
> +	struct xfs_inode	*dest,
> +	xfs_fileoff_t		destoff,
> +	xfs_filblks_t		len,
> +	xfs_off_t		new_isize)
> +{
> +	struct xfs_bmbt_irec	imap;
> +	int			nimaps;
> +	int			error = 0;
> +	xfs_filblks_t		range_len;
> +
> +	/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
> +	while (len) {
> +		trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
> +				dest, destoff);
> +		/* Read extent from the source file */
> +		nimaps = 1;
> +		xfs_ilock(src, XFS_ILOCK_EXCL);
> +		error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
> +		xfs_iunlock(src, XFS_ILOCK_EXCL);
> +		if (error)
> +			goto err;
> +		ASSERT(nimaps == 1);
> +
> +		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
> +				&imap);
> +
> +		/* Translate imap into the destination file. */
> +		range_len = imap.br_startoff + imap.br_blockcount - srcoff;
> +		imap.br_startoff += destoff - srcoff;
> +

Just FYI... these are all unsigned vars...

Brian

> +		/* Clear dest from destoff to the end of imap and map it in. */
> +		error = xfs_reflink_remap_extent(dest, &imap, destoff,
> +				new_isize);
> +		if (error)
> +			goto err;
> +
> +		if (fatal_signal_pending(current)) {
> +			error = -EINTR;
> +			goto err;
> +		}
> +
> +		/* Advance drange/srange */
> +		srcoff += range_len;
> +		destoff += range_len;
> +		len -= range_len;
> +	}
> +
> +	return 0;
> +
> +err:
> +	trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
> +	return error;
> +}
> +
> +/*
> + * Link a range of blocks from one file to another.
> + */
> +int
> +xfs_reflink_remap_range(
> +	struct xfs_inode	*src,
> +	xfs_off_t		srcoff,
> +	struct xfs_inode	*dest,
> +	xfs_off_t		destoff,
> +	xfs_off_t		len)
> +{
> +	struct xfs_mount	*mp = src->i_mount;
> +	xfs_fileoff_t		sfsbno, dfsbno;
> +	xfs_filblks_t		fsblen;
> +	int			error;
> +
> +	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> +		return -EOPNOTSUPP;
> +
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return -EIO;
> +
> +	/* Don't reflink realtime inodes */
> +	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
> +		return -EINVAL;
> +
> +	trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
> +
> +	/* Lock both files against IO */
> +	if (src->i_ino == dest->i_ino) {
> +		xfs_ilock(src, XFS_IOLOCK_EXCL);
> +		xfs_ilock(src, XFS_MMAPLOCK_EXCL);
> +	} else {
> +		xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL);
> +		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
> +	}
> +
> +	error = xfs_reflink_set_inode_flag(src, dest);
> +	if (error)
> +		goto out_error;
> +
> +	/*
> +	 * Invalidate the page cache so that we can clear any CoW mappings
> +	 * in the destination file.
> +	 */
> +	truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
> +				   PAGE_ALIGN(destoff + len) - 1);
> +
> +	dfsbno = XFS_B_TO_FSBT(mp, destoff);
> +	sfsbno = XFS_B_TO_FSBT(mp, srcoff);
> +	fsblen = XFS_B_TO_FSB(mp, len);
> +	error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
> +			destoff + len);
> +	if (error)
> +		goto out_error;
> +
> +	error = xfs_reflink_update_dest(dest, destoff + len);
> +	if (error)
> +		goto out_error;
> +
> +out_error:
> +	xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
> +	xfs_iunlock(src, XFS_IOLOCK_EXCL);
> +	if (src->i_ino != dest->i_ino) {
> +		xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
> +		xfs_iunlock(dest, XFS_IOLOCK_EXCL);
> +	}
> +	if (error)
> +		trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
> +	return error;
> +}
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index 1d2f180..c35ce29 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -43,5 +43,7 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
>  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count);
>  extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
> +extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
> +		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len);
>  
>  #endif /* __XFS_REFLINK_H */
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html