[PATCH 2/3] xfs: Add support IOC_MOV_DATA ioctl

Namjae Jeon <namjae.jeon@xxxxxxxxxxx> · Tue, 08 Jul 2014 20:59:49 +0900

This patch implements fs ioctl's IOC_MOV_DATA for XFS.

The semantics of this ioctl are:
1) Like collapse range, offsets and length should be file system block size
   aligned.
2) In the receiver file, atleast length size hole should be present at
   receiver_offset
3) It does not change file size of any of donor or receiver file.
4) It leaves a hole at the place from where blocks are moved out in donor file.
5) Both (donor_offset + length) and (receiver_offset + length) should be within
   size of donor file and receiver file respectively.
   Only unwritten extents resides beyond file size and it does not make sense
   to transfer unwritten extents, leave apart the security issues it may raise.
6) If the range to be transfered from donor file contain any holes, they are
   replicated as it is in receiver file. It mean holes are preserved and
   the length of hole will be added to moved_len signifying that the hole range
   is succesfully transfered.

Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
Signed-off-by: Ashish Sangwan <a.sangwan@xxxxxxxxxxx>
---
 fs/xfs/libxfs/xfs_bmap.c | 148 ++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_bmap.h |   4 +
 fs/xfs/xfs_bmap_util.c   | 251 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_bmap_util.h   |   3 +
 fs/xfs/xfs_iops.c        |   1 +
 5 files changed, 407 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 3dee150..c2ae99e 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5982,3 +5982,151 @@ del_cursor:
 	xfs_trans_log_inode(tp, ip, logflags);
 	return error;
 }
+/*
+ * Move an extent record pointed by got and whose extent number is stored in
+ * donor_ext from donor inode dip to receiver inode rip.
+ * Extent will be moved at roffset_fsb inside receiver inode.
+ * Caller of this function must make sure there is atleast got->br_blockcount
+ * size hole at roffset_fsb.
+ */
+int xfs_move_extent(
+	struct xfs_trans	*tp,
+	xfs_inode_t		*dip,
+	xfs_inode_t		*rip,
+	xfs_extnum_t		donor_ext,
+	struct xfs_bmbt_irec	*got,
+	xfs_fileoff_t		roffset_fsb,
+	xfs_fsblock_t		*dfirstblock,
+	struct xfs_bmap_free	*dflist,
+	xfs_fsblock_t		*rfirstblock,
+	struct xfs_bmap_free	*rflist)
+
+{
+	struct xfs_mount		*mp = dip->i_mount;
+	struct xfs_bmbt_rec_host	*gotp;
+	struct xfs_btree_cur		*dcur;
+	struct xfs_btree_cur		*rcur;
+	xfs_ifork_t			*difp;
+	xfs_ifork_t			*rifp;
+	xfs_extnum_t			idx;
+	int                             i;
+	int                             dlogflags;
+	int                             rlogflags;
+	int                             tmp_logflags;
+	int				error;
+	int				whichfork = XFS_DATA_FORK;
+	struct xfs_bmalloca		bma = { NULL };
+
+	if (unlikely(XFS_TEST_ERROR(
+	    (XFS_IFORK_FORMAT(dip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	     XFS_IFORK_FORMAT(dip, whichfork) != XFS_DINODE_FMT_BTREE),
+	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+		XFS_ERROR_REPORT("xfs_move_extent",
+				XFS_ERRLEVEL_LOW, mp);
+		return -EFSCORRUPTED;
+	}
+
+	if (unlikely(XFS_TEST_ERROR(
+	    (XFS_IFORK_FORMAT(rip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	     XFS_IFORK_FORMAT(rip, whichfork) != XFS_DINODE_FMT_BTREE),
+	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+		XFS_ERROR_REPORT("xfs_move_extent",
+				XFS_ERRLEVEL_LOW, mp);
+		return -EFSCORRUPTED;
+	}
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	dlogflags = XFS_ILOG_CORE;
+	difp = XFS_IFORK_PTR(dip, whichfork);
+
+	if (difp->if_flags & XFS_IFBROOT) {
+		dcur = xfs_bmbt_init_cursor(mp, tp, dip, whichfork);
+		dcur->bc_private.b.firstblock = *dfirstblock;
+		dcur->bc_private.b.flist = dflist;
+		dcur->bc_private.b.flags = 0;
+	} else {
+		dcur = NULL;
+		dlogflags |= XFS_ILOG_DEXT;
+	}
+
+	rlogflags = XFS_ILOG_CORE;
+	rifp = XFS_IFORK_PTR(rip, whichfork);
+
+	if (rifp->if_flags & XFS_IFBROOT) {
+		rcur = xfs_bmbt_init_cursor(mp, tp, rip, whichfork);
+		rcur->bc_private.b.firstblock = *rfirstblock;
+		rcur->bc_private.b.flist = rflist;
+		rcur->bc_private.b.flags = 0;
+	} else {
+		rcur = NULL;
+		rlogflags |= XFS_ILOG_DEXT;
+	}
+
+	if (dcur) {
+		error = xfs_bmbt_lookup_eq(dcur,
+					   got->br_startoff,
+					   got->br_startblock,
+					   got->br_blockcount,
+					   &i);
+		if (error)
+			goto del_cursor;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+	}
+
+	gotp = xfs_iext_bno_to_ext(rifp, roffset_fsb, &idx);
+
+	/* Initialize bma */
+	bma.tp = tp;
+	bma.ip = rip;
+	bma.flist = rflist;
+	bma.firstblock = rfirstblock;
+	bma.cur = rcur;
+	bma.got.br_startoff = roffset_fsb;
+	bma.got.br_startblock = got->br_startblock;
+	bma.got.br_blockcount = got->br_blockcount;
+	bma.got.br_state = got->br_state;
+	bma.idx = idx;
+
+	error = xfs_bmap_add_extent_hole_real(&bma, whichfork);
+	if (error)
+		goto del_cursor;
+	rlogflags |= bma.logflags;
+	rip->i_d.di_nblocks += got->br_blockcount;
+
+	xfs_iext_remove(dip, donor_ext, 1, 0);
+	if (dcur) {
+		error = xfs_btree_delete(dcur, &i);
+		if (error)
+			goto del_cursor;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+	}
+	XFS_IFORK_NEXT_SET(dip, whichfork,
+			   XFS_IFORK_NEXTENTS(dip, whichfork) - 1);
+	dip->i_d.di_nblocks -= got->br_blockcount;
+
+	if (xfs_bmap_wants_extents(dip, whichfork)) {
+		ASSERT(dcur != NULL);
+		error = xfs_bmap_btree_to_extents(tp, dip, dcur, &tmp_logflags,
+				whichfork);
+		dlogflags |= tmp_logflags;
+	}
+
+del_cursor:
+	if (dcur) {
+		dcur->bc_private.b.allocated = 0;
+		xfs_btree_del_cursor(dcur,
+				error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	}
+	xfs_trans_log_inode(tp, dip, dlogflags);
+
+	if (rcur) {
+		rcur->bc_private.b.allocated = 0;
+		xfs_btree_del_cursor(rcur,
+				error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	}
+	xfs_trans_log_inode(tp, rip, rlogflags);
+
+	return error;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index af05899..ebfe584 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -191,5 +191,9 @@ int	xfs_bmap_shift_extents_right(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_extnum_t *current_ext, xfs_extnum_t end_ext,
 		xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
 		int num_exts);
+int	xfs_move_extent(struct xfs_trans *, xfs_inode_t *, xfs_inode_t *,
+		xfs_extnum_t, struct xfs_bmbt_irec *, xfs_fileoff_t,
+		xfs_fsblock_t *, struct xfs_bmap_free *, xfs_fsblock_t *,
+		struct xfs_bmap_free *);
 
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3c05843..c004b25 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -2071,3 +2071,254 @@ out_trans_cancel:
 	xfs_trans_cancel(tp, 0);
 	goto out_unlock;
 }
+
+/*
+ * Compute and return the size of hole, if present, @offset_fsb.
+ * The hole size is distance between offset_fsb and till we reach the next
+ * allocated extent. If next extent is beyond isize, the size is computed
+ * till isize.
+ */
+int
+xfs_compute_hole_size(
+	xfs_inode_t		*ip,
+	xfs_fileoff_t		offset_fsb,
+	struct xfs_trans	*tp,
+	xfs_fileoff_t		*count)
+{
+	struct xfs_ifork		*ifp;
+	struct xfs_bmbt_rec_host	*gotp;
+	struct xfs_bmbt_irec		got;
+	xfs_extnum_t			current_ext;
+	xfs_fileoff_t			isize_fsb;
+	int				error = 0;
+
+	*count = 0;
+	isize_fsb = XFS_B_TO_FSB(ip->i_mount, VFS_I(ip)->i_size);
+	ASSERT(isize_fsb > offset_fsb);
+
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+		if (error)
+			return error;
+	}
+
+	gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &current_ext);
+	if (gotp) {
+		xfs_bmbt_get_all(gotp, &got);
+		if (got.br_startoff != offset_fsb) {
+			ASSERT(got.br_startoff > offset_fsb);
+			*count = (isize_fsb > got.br_startoff) ?
+				 (got.br_startoff - offset_fsb) :
+				 (isize_fsb - offset_fsb);
+		}
+	} else
+		*count = isize_fsb - offset_fsb;
+
+	return error;
+}
+
+int
+xfs_vn_move_data(
+	struct inode	*donor,
+	struct inode	*receiver,
+	loff_t		doffset,
+	loff_t		roffset,
+	loff_t		len,
+	loff_t		*moved_len)
+{
+	struct xfs_mount		*mp;
+	struct xfs_ifork		*ifp;
+	struct xfs_trans		*tp;
+	struct xfs_bmbt_rec_host	*gotp;
+	struct xfs_bmbt_irec		got;
+	struct xfs_bmap_free		dfree_list;
+	struct xfs_bmap_free		rfree_list;
+	xfs_inode_t			*dip;
+	xfs_inode_t			*rip;
+	xfs_extnum_t			split_ext;
+	xfs_extnum_t			dcurrent_ext = 0;
+	xfs_fsblock_t			dfirstblock;
+	xfs_fsblock_t			rfirstblock;
+	xfs_fileoff_t			dcurrent_fsb;
+	xfs_fileoff_t			rcurrent_fsb;
+	xfs_fileoff_t			len_fsb;
+	xfs_fileoff_t			moved_blocks = 0;
+	xfs_fileoff_t			hole_blkcnt = 0;
+	xfs_off_t			pg_start;
+	int				committed;
+	int				error;
+	unsigned			mask = (1 << donor->i_blkbits) - 1;
+
+	dip = XFS_I(donor);
+	rip = XFS_I(receiver);
+	mp = dip->i_mount;
+
+	if (XFS_FORCED_SHUTDOWN(dip->i_mount))
+		return -EIO;
+
+	xfs_lock_two_inodes(dip, rip, XFS_IOLOCK_EXCL);
+
+	if (doffset + len > donor->i_size || roffset + len > receiver->i_size) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (doffset & mask || roffset & mask || len & mask) {
+		error = EINVAL;
+		goto out;
+	}
+
+	dcurrent_fsb = XFS_B_TO_FSB(mp, doffset);
+	rcurrent_fsb = XFS_B_TO_FSB(mp, roffset);
+	len_fsb = XFS_B_TO_FSB(mp, len);
+
+	pg_start = round_down(doffset, PAGE_SIZE);
+	error = -filemap_write_and_wait_range(donor->i_mapping, pg_start, -1);
+	if (error)
+		goto out;
+	truncate_pagecache_range(donor, pg_start, -1);
+
+	pg_start = round_down(roffset, PAGE_SIZE);
+	error = -filemap_write_and_wait_range(receiver->i_mapping,
+					     pg_start, -1);
+	if (error)
+		goto out;
+	truncate_pagecache_range(receiver, pg_start, -1);
+
+	error = xfs_qm_dqattach(dip, 0);
+	if (error)
+		goto out;
+
+	error = xfs_qm_dqattach(rip, 0);
+	if (error)
+		goto out;
+
+	error = xfs_bmap_split_extent(dip, dcurrent_fsb, &split_ext);
+	if (error)
+		goto out;
+
+	error = xfs_bmap_split_extent(dip, dcurrent_fsb + len_fsb,
+				      &split_ext);
+	if (error)
+		goto out;
+
+	ifp = XFS_IFORK_PTR(dip, XFS_DATA_FORK);
+
+	while (moved_blocks < len_fsb && !error) {
+		gotp = xfs_iext_bno_to_ext(ifp, dcurrent_fsb, &dcurrent_ext);
+		if (!gotp) {
+			/* No more data blocks left in donor */
+			moved_blocks = len_fsb;
+			break;
+		}
+
+		xfs_bmbt_get_all(gotp, &got);
+		if (dcurrent_fsb != got.br_startoff) {
+			if (dcurrent_fsb > got.br_startoff) {
+				error = -EFSCORRUPTED;
+				break;
+			}
+			hole_blkcnt = got.br_startoff - dcurrent_fsb;
+			dcurrent_fsb += hole_blkcnt;
+			rcurrent_fsb += hole_blkcnt;
+			moved_blocks += hole_blkcnt;
+			if (moved_blocks >= len_fsb) {
+				moved_blocks = len_fsb;
+				break;
+			}
+		}
+
+		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+
+		/*
+		 * We would need to reserve permanent block for transaction.
+		 * This will come into picture when after shifting extent into
+		 * hole we found that adjacent extents can be merged which
+		 * may lead to freeing of a block during record update.
+		 */
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+					  XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			break;
+		}
+
+		xfs_lock_two_inodes(dip, rip, XFS_ILOCK_EXCL);
+
+		error = xfs_trans_reserve_quota(tp, mp, dip->i_udquot,
+				dip->i_gdquot, dip->i_pdquot,
+				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+				XFS_QMOPT_RES_REGBLKS);
+		if (error)
+			goto error2;
+
+		error = xfs_trans_reserve_quota(tp, mp, rip->i_udquot,
+				rip->i_gdquot, rip->i_pdquot,
+				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+				XFS_QMOPT_RES_REGBLKS);
+		if (error)
+			goto error2;
+
+		xfs_trans_ijoin(tp, dip, 0);
+		xfs_trans_ijoin(tp, rip, 0);
+
+		xfs_bmap_init(&dfree_list, &dfirstblock);
+		xfs_bmap_init(&rfree_list, &rfirstblock);
+
+		/* compute the hole size in first iteration */
+		if (moved_blocks == hole_blkcnt) {
+			error = xfs_compute_hole_size(rip, rcurrent_fsb,
+						      tp, &hole_blkcnt);
+			if (error)
+				goto error0;
+			if (hole_blkcnt < len_fsb) {
+				error = EINVAL;
+				goto error0;
+			}
+		}
+
+		error = xfs_move_extent(tp, dip, rip, dcurrent_ext, &got,
+					rcurrent_fsb, &dfirstblock,
+					&dfree_list, &rfirstblock, &rfree_list);
+		if (error)
+			goto error0;
+
+		error = xfs_bmap_finish(&tp, &dfree_list, &committed);
+		if (error)
+			goto error0;
+
+		error = xfs_bmap_finish(&tp, &rfree_list, &committed);
+		if (error)
+			goto error1;
+
+		error = xfs_trans_commit(tp, 0);
+
+		if (!error) {
+			dcurrent_fsb += got.br_blockcount;
+			moved_blocks += got.br_blockcount;
+			rcurrent_fsb += got.br_blockcount;
+		}
+
+		xfs_iunlock(dip, XFS_ILOCK_EXCL);
+		xfs_iunlock(rip, XFS_ILOCK_EXCL);
+	}
+out:
+	*moved_len = moved_blocks << donor->i_blkbits;
+	xfs_iunlock(dip, XFS_IOLOCK_EXCL);
+	xfs_iunlock(rip, XFS_IOLOCK_EXCL);
+	return -error;
+
+error0:
+	xfs_bmap_cancel(&dfree_list);
+error1:
+	xfs_bmap_cancel(&rfree_list);
+error2:
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+	xfs_iunlock(dip, XFS_ILOCK_EXCL);
+	xfs_iunlock(rip, XFS_ILOCK_EXCL);
+	xfs_iunlock(dip, XFS_IOLOCK_EXCL);
+	xfs_iunlock(rip, XFS_IOLOCK_EXCL);
+	*moved_len = moved_blocks << donor->i_blkbits;
+	return -error;
+}
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 9a18a4b..b2ae123 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -104,6 +104,9 @@ int	xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
 				xfs_off_t len);
 int	xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
 				xfs_off_t len);
+int	xfs_vn_move_data(struct inode *donor, struct inode *receiver,
+			 loff_t doffset, loff_t roffset, loff_t len,
+			 loff_t *moved_len);
 
 /* EOF block manipulation functions */
 bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d75621a..63c1621 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1100,6 +1100,7 @@ static const struct inode_operations xfs_inode_operations = {
 	.listxattr		= xfs_vn_listxattr,
 	.fiemap			= xfs_vn_fiemap,
 	.update_time		= xfs_vn_update_time,
+	.mov_data		= xfs_vn_move_data,
 };
 
 static const struct inode_operations xfs_dir_inode_operations = {
-- 
1.7.11-rc0

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html