[PATCH 12/14] xfs: support XFS_XFLAG_REFLINK (and FS_NOCOW_FL) on reflink filesystems

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Report the reflink/nocow flags as appropriate in the XFS-specific and
"standard" getattr ioctls.  For now we'll implicilty report all reflink
files as also being nodefrag, to prevent the defragger from corrupting
the extent maps.

Allow the user to clear the reflink flag (or set the nocow flag), which
will try to remap all shared blocks to private blocks on disk.  If this
succeeds, the file will become a non-reflinked file.

Transfer the reflink flag between inodes when swapping extents, and
quietly ignore attempts to set the reflink flag, so that xfs_fsr can
defragment reflinked file (albeit by breaking the reflink...) unless
of course nodefrag is set.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/libxfs/xfs_fs.h |    1 
 fs/xfs/xfs_bmap_util.c |    5 +
 fs/xfs/xfs_inode.c     |    2 
 fs/xfs/xfs_ioctl.c     |   42 +++++-
 fs/xfs/xfs_reflink.c   |  321 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h   |   10 +
 6 files changed, 374 insertions(+), 7 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 7f4d886..6b1b71c 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -67,6 +67,7 @@ struct fsxattr {
 #define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
 #define XFS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
 #define XFS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
+#define XFS_XFLAG_REFLINK	0x00008000	/* file is reflinked */
 #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index be010c9..e5b4752 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1920,6 +1920,11 @@ xfs_swap_extents(
 		break;
 	}
 
+	if (ip->i_d.di_flags & XFS_DIFLAG_REFLINK) {
+		tip->i_d.di_flags |= XFS_DIFLAG_REFLINK;
+		ip->i_d.di_flags &= ~XFS_DIFLAG_REFLINK;
+	}
+
 	xfs_trans_log_inode(tp, ip,  src_log_flags);
 	xfs_trans_log_inode(tp, tip, target_log_flags);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e688732..4aa51f4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -592,6 +592,8 @@ _xfs_dic2xflags(
 			flags |= XFS_XFLAG_NODEFRAG;
 		if (di_flags & XFS_DIFLAG_FILESTREAM)
 			flags |= XFS_XFLAG_FILESTREAM;
+		if (di_flags & XFS_DIFLAG_REFLINK)
+			flags |= XFS_XFLAG_REFLINK;
 	}
 
 	return flags;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index da4d7b7..5a9c161 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -870,6 +870,10 @@ xfs_merge_ioc_xflags(
 		xflags |= XFS_XFLAG_NODUMP;
 	else
 		xflags &= ~XFS_XFLAG_NODUMP;
+	if (flags & FS_NOCOW_FL)
+		xflags &= ~XFS_XFLAG_REFLINK;
+	else
+		xflags |= XFS_XFLAG_REFLINK;
 
 	return xflags;
 }
@@ -939,7 +943,8 @@ xfs_set_diflags(
 	unsigned int		di_flags;
 
 	/* can't set PREALLOC this way, just preserve it */
-	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+	di_flags = (ip->i_d.di_flags &
+			(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_REFLINK));
 	if (xflags & XFS_XFLAG_IMMUTABLE)
 		di_flags |= XFS_DIFLAG_IMMUTABLE;
 	if (xflags & XFS_XFLAG_APPEND)
@@ -1002,9 +1007,11 @@ static int
 xfs_ioctl_setattr_xflags(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
-	struct fsxattr		*fa)
+	struct fsxattr		*fa,
+	struct file		*filp)
 {
 	struct xfs_mount	*mp = ip->i_mount;
+	int			error;
 
 	/* Can't change realtime flag if any extents are allocated. */
 	if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
@@ -1028,6 +1035,9 @@ xfs_ioctl_setattr_xflags(
 		return -EPERM;
 
 	xfs_set_diflags(ip, fa->fsx_xflags);
+	error = xfs_reflink_end_unshare(ip, fa->fsx_xflags);
+	if (error)
+		return error;
 	xfs_diflags_to_linux(ip);
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -1170,7 +1180,8 @@ xfs_ioctl_setattr_check_projid(
 STATIC int
 xfs_ioctl_setattr(
 	xfs_inode_t		*ip,
-	struct fsxattr		*fa)
+	struct fsxattr		*fa,
+	struct file		*filp)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
@@ -1181,6 +1192,10 @@ xfs_ioctl_setattr(
 
 	trace_xfs_ioctl_setattr(ip);
 
+	code = xfs_reflink_check_flag_adjust(ip, &fa->fsx_xflags);
+	if (code)
+		return code;
+
 	code = xfs_ioctl_setattr_check_projid(ip, fa);
 	if (code)
 		return code;
@@ -1201,6 +1216,10 @@ xfs_ioctl_setattr(
 			return code;
 	}
 
+	code = xfs_reflink_start_unshare(ip, fa->fsx_xflags, filp);
+	if (code)
+		return code;
+
 	tp = xfs_ioctl_setattr_get_trans(ip);
 	if (IS_ERR(tp)) {
 		code = PTR_ERR(tp);
@@ -1220,7 +1239,7 @@ xfs_ioctl_setattr(
 	if (code)
 		goto error_trans_cancel;
 
-	code = xfs_ioctl_setattr_xflags(tp, ip, fa);
+	code = xfs_ioctl_setattr_xflags(tp, ip, fa, filp);
 	if (code)
 		goto error_trans_cancel;
 
@@ -1290,7 +1309,7 @@ xfs_ioc_fssetxattr(
 	error = mnt_want_write_file(filp);
 	if (error)
 		return error;
-	error = xfs_ioctl_setattr(ip, &fa);
+	error = xfs_ioctl_setattr(ip, &fa, filp);
 	mnt_drop_write_file(filp);
 	return error;
 }
@@ -1303,6 +1322,7 @@ xfs_ioc_getxflags(
 	unsigned int		flags;
 
 	flags = xfs_di2lxflags(ip->i_d.di_flags);
+	xfs_reflink_get_lxflags(ip, &flags);
 	if (copy_to_user(arg, &flags, sizeof(flags)))
 		return -EFAULT;
 	return 0;
@@ -1324,22 +1344,30 @@ xfs_ioc_setxflags(
 
 	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 		      FS_NOATIME_FL | FS_NODUMP_FL | \
-		      FS_SYNC_FL))
+		      FS_SYNC_FL | FS_NOCOW_FL))
 		return -EOPNOTSUPP;
 
 	fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
 
+	error = xfs_reflink_check_flag_adjust(ip, &fa.fsx_xflags);
+	if (error)
+		return error;
+
 	error = mnt_want_write_file(filp);
 	if (error)
 		return error;
 
+	error = xfs_reflink_start_unshare(ip, fa.fsx_xflags, filp);
+	if (error)
+		return error;
+
 	tp = xfs_ioctl_setattr_get_trans(ip);
 	if (IS_ERR(tp)) {
 		error = PTR_ERR(tp);
 		goto out_drop_write;
 	}
 
-	error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
+	error = xfs_ioctl_setattr_xflags(tp, ip, &fa, filp);
 	if (error) {
 		xfs_trans_cancel(tp);
 		goto out_drop_write;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 325dd14..23ce9fc 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1008,3 +1008,324 @@ out:
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+/**
+ * xfs_reflink_get_lxflags() - set reflink-related linux inode flags
+ *
+ * @ip: XFS inode
+ * @flags: Pointer to the user-visible inode flags
+ */
+void
+xfs_reflink_get_lxflags(
+	struct xfs_inode	*ip,		/* XFS inode */
+	unsigned int		*flags)		/* user flags */
+{
+	/*
+	 * If this is a reflink-capable filesystem and there are no shared
+	 * blocks, then this is a "nocow" file.
+	 */
+	if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) ||
+	    (ip->i_d.di_flags & XFS_DIFLAG_REFLINK))
+		return;
+	*flags |= FS_NOCOW_FL;
+}
+
+
+/**
+ * xfs_reflink_dirty_range() -- Dirty all the shared blocks in the file so that
+ * they're rewritten elsewhere.  Similar to generic_perform_write().
+ *
+ * @filp: VFS file pointer
+ * @pos: offset to start dirtying
+ * @len: number of bytes to dirty
+ */
+STATIC int
+xfs_reflink_dirty_range(
+	struct file		*filp,
+	xfs_off_t		pos,
+	xfs_off_t		len)
+{
+	struct address_space	*mapping;
+	const struct address_space_operations *a_ops;
+	int			error;
+	unsigned int		flags;
+	struct page		*page;
+	struct page		*rpage;
+	unsigned long		offset;	/* Offset into pagecache page */
+	unsigned long		bytes;	/* Bytes to write to page */
+	void			*fsdata;
+
+	mapping = filp->f_mapping;
+	a_ops = mapping->a_ops;
+	flags = AOP_FLAG_UNINTERRUPTIBLE;
+	do {
+
+		offset = (pos & (PAGE_CACHE_SIZE - 1));
+		bytes = min_t(unsigned long, len, PAGE_CACHE_SIZE) - offset;
+		rpage = xfs_get_page(filp->f_inode, pos);
+		if (IS_ERR(rpage)) {
+			error = PTR_ERR(rpage);
+			break;
+		} else if (!rpage) {
+			error = -ENOMEM;
+			break;
+		}
+
+		error = a_ops->write_begin(filp, mapping, pos, bytes, flags,
+					   &page, &fsdata);
+		page_cache_release(rpage);
+		if (error < 0)
+			break;
+
+		if (!PageUptodate(page))
+			printk(KERN_ERR "%s: STALE? ino=%lu pos=%llu\n", __func__, filp->f_inode->i_ino, pos);
+		if (mapping_writably_mapped(mapping))
+			flush_dcache_page(page);
+
+		error = a_ops->write_end(filp, mapping, pos, bytes, bytes,
+					 page, fsdata);
+		if (error < 0)
+			break;
+		else if (error == 0) {
+			error = -EIO;
+			break;
+		} else {
+			bytes = error;
+			error = 0;
+		}
+
+		cond_resched();
+
+		pos += bytes;
+		len -= bytes;
+
+		balance_dirty_pages_ratelimited(mapping);
+		if (fatal_signal_pending(current)) {
+			error = -EINTR;
+			break;
+		}
+	} while (len > 0);
+
+	return error;
+}
+
+/**
+ * xfs_reflink_check_flag_adjust() - the only change we allow to the inode
+ * reflink flag is to clear it when the fs supports reflink.
+ *
+ * @ip: XFS inode
+ * @xflags: XFS in-core inode flags
+ */
+int						/* error */
+xfs_reflink_check_flag_adjust(
+	struct xfs_inode	*ip,		/* XFS inode */
+	unsigned int		*xflags)		/* in-core flags */
+{
+	unsigned int		chg;
+
+	compiletime_assert(XFS_XFLAG_REFLINK == XFS_DIFLAG_REFLINK,
+			"in-core and on-disk inode reflink flags must match");
+	chg = (*xflags & XFS_XFLAG_REFLINK) ^
+	      (ip->i_d.di_flags & XFS_DIFLAG_REFLINK);
+
+	if (!chg)
+		return 0;
+	if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb))
+		return -EOPNOTSUPP;
+	if (*xflags & XFS_XFLAG_REFLINK) {
+		*xflags &= ~XFS_XFLAG_REFLINK;
+		return 0;
+	}
+	return 0;
+}
+
+/**
+ * xfs_reflink_start_unshare() - dirty all the shared blocks so that they
+ * can be reallocated elsewhere, in preparation for clearing the reflink
+ * hint.
+ *
+ * @ip: XFS inode
+ * @xflags: XFS in-core inode flags
+ * @filp: VFS file structure
+ */
+int						/* error */
+xfs_reflink_start_unshare(
+	struct xfs_inode	*ip,		/* XFS inode */
+	unsigned int		xflags,		/* in-core flags */
+	struct file		*filp)		/* VFS file structure */
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	int			error = 0;
+	xfs_fileoff_t		fbno;
+	xfs_filblks_t		end;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_extlen_t		len;
+	xfs_nlink_t		nr;
+	xfs_off_t		isize;
+	xfs_off_t		fpos;
+	xfs_off_t		flen;
+	struct xfs_bmbt_irec	map[2];
+	int			nmaps;
+
+	if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) ||
+	    (xflags & XFS_XFLAG_REFLINK) ||
+	    !(ip->i_d.di_flags & XFS_DIFLAG_REFLINK))
+		return 0;
+
+	inode_dio_wait(VFS_I(ip));
+
+	/*
+	 * The user wants to preemptively CoW all shared blocks in this file,
+	 * which enables us to turn off the reflink flag.  Iterate all
+	 * extents which are not prealloc/delalloc to see which ranges are
+	 * mentioned in the refcount tree, then read those blocks into the
+	 * pagecache, dirty them, fsync them back out, and then we can update
+	 * the inode flag.  What happens if we run out of memory? :)
+	 */
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	fbno = 0;
+	isize = i_size_read(VFS_I(ip));
+	if (isize == 0) {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		return 0;
+	}
+	end = XFS_B_TO_FSB(mp, isize);
+	while (end - fbno > 0) {
+		nmaps = 1;
+		/*
+		 * Look for extents in the file.  Skip holes, delalloc, or
+		 * unwritten extents; they can't be reflinked.
+		 */
+		error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0);
+		if (error)
+			goto out_unlock;
+		if (nmaps == 0)
+			break;
+		if (map[0].br_startblock == HOLESTARTBLOCK ||
+		    map[0].br_startblock == DELAYSTARTBLOCK ||
+		    map[0].br_state == XFS_EXT_UNWRITTEN)
+			goto next;
+
+		map[1] = map[0];
+		while (map[1].br_blockcount) {
+			agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock);
+			agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
+			CHECK_AG_NUMBER(mp, agno);
+			CHECK_AG_EXTENT(mp, agbno, 1);
+
+			error = xfs_reflink_get_refcount(mp, agno, agbno,
+							 &len, &nr);
+			if (error)
+				goto out_unlock;
+			XFS_WANT_CORRUPTED_GOTO(mp, len != 0, out_unlock);
+			if (len > map[1].br_blockcount)
+				len = map[1].br_blockcount;
+			if (nr < 2)
+				goto skip_copy;
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			fpos = XFS_FSB_TO_B(mp, map[1].br_startoff);
+			flen = XFS_FSB_TO_B(mp, len);
+			if (fpos + flen > isize)
+				flen = isize - fpos;
+			error = xfs_reflink_dirty_range(filp, fpos, flen);
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+			if (error)
+				goto out_unlock;
+skip_copy:
+			map[1].br_blockcount -= len;
+			map[1].br_startoff += len;
+			map[1].br_startblock += len;
+		}
+
+next:
+		fbno = map[0].br_startoff + map[0].br_blockcount;
+	}
+
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	if (error == 0)
+		error = filemap_write_and_wait(filp->f_mapping);
+	return error;
+}
+
+/**
+ * xfs_reflink_end_unshare() - finish removing reflink flag from inode
+ *
+ * @ip: XFS inode
+ * @xflags: XFS in-core inode flags
+ */
+int						/* error */
+xfs_reflink_end_unshare(
+	struct xfs_inode	*ip,		/* XFS inode */
+	unsigned int		xflags)		/* VFS file structure */
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	int			error;
+	xfs_fileoff_t		fbno;
+	xfs_filblks_t		end;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_extlen_t		len;
+	xfs_nlink_t		nr;
+	struct xfs_bmbt_irec	map[2];
+	int			nmaps;
+
+	if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) ||
+	    (xflags & XFS_XFLAG_REFLINK) ||
+	    !(ip->i_d.di_flags & XFS_DIFLAG_REFLINK))
+		return 0;
+
+	/*
+	 * Earlier we copied all the shared blocks in this file to new blocks.
+	 * However, we dropped the ilock before getting the transaction, so
+	 * check that nobody wandered in and added more reflinks.
+	 */
+	fbno = 0;
+	end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
+	while (end - fbno > 0) {
+		nmaps = 1;
+		/*
+		 * Look for extents in the file.  Skip holes, delalloc, or
+		 * unwritten extents; they can't be reflinked.
+		 */
+		error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0);
+		if (error)
+			goto out_unlock;
+		if (nmaps == 0)
+			break;
+		if (map[0].br_startblock == HOLESTARTBLOCK ||
+		    map[0].br_startblock == DELAYSTARTBLOCK ||
+		    map[0].br_state == XFS_EXT_UNWRITTEN)
+			goto next;
+
+		map[1] = map[0];
+		while (map[1].br_blockcount) {
+			agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock);
+			agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
+			CHECK_AG_NUMBER(mp, agno);
+			CHECK_AG_EXTENT(mp, agbno, 1);
+
+			error = xfs_reflink_get_refcount(mp, agno, agbno,
+							 &len, &nr);
+			if (error)
+				goto out_unlock;
+			XFS_WANT_CORRUPTED_GOTO(mp, len != 0, out_unlock);
+			if (len > map[1].br_blockcount)
+				len = map[1].br_blockcount;
+			if (nr > 1) {
+				error = -EINTR;
+				goto out_unlock;
+			}
+			map[1].br_blockcount -= len;
+			map[1].br_startblock += len;
+		}
+
+next:
+		fbno = map[0].br_startoff + map[0].br_blockcount;
+	}
+
+	ip->i_d.di_flags &= ~XFS_DIFLAG_REFLINK;
+out_unlock:
+	return error;
+}
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 7f9660d..6f1ecf8 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -50,4 +50,14 @@ extern int xfs_reflink_finish_fork_buf(xfs_mount_t  *mp, xfs_inode_t *ip,
 	xfs_buf_t *bp, xfs_fileoff_t fileoff, xfs_trans_t *tp,
 	int write_error);
 
+extern void xfs_reflink_get_lxflags(struct xfs_inode *ip, unsigned int *flags);
+
+extern int xfs_reflink_check_flag_adjust(struct xfs_inode *ip,
+	unsigned int *xflags);
+
+extern int xfs_reflink_start_unshare(struct xfs_inode *ip, unsigned int xflags,
+	struct file *filp);
+
+extern int xfs_reflink_end_unshare(struct xfs_inode *ip, unsigned int xflags);
+
 #endif /* __XFS_REFLINK_H */

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux