Report the reflink/nocow flags as appropriate in the XFS-specific and "standard" getattr ioctls. Allow the user to clear the reflink flag (or set the nocow flag), which will try to remap all shared blocks to private blocks on disk. If this succeeds, the file will become a non-reflinked file. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_fs.h | 1 fs/xfs/xfs_inode.c | 10 + fs/xfs/xfs_ioctl.c | 39 +++++- fs/xfs/xfs_reflink.c | 334 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_reflink.h | 7 + 5 files changed, 382 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 2951abb..d7541f7 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -67,6 +67,7 @@ struct fsxattr { #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ #define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ #define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define XFS_XFLAG_REFLINK 0x00008000 /* file is reflinked */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1d97238..1d2d364 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -558,7 +558,8 @@ __xfs_iflock( STATIC uint _xfs_dic2xflags( - __uint16_t di_flags) + __uint16_t di_flags, + __uint64_t di_flags2) { uint flags = 0; @@ -591,6 +592,8 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_NODEFRAG; if (di_flags & XFS_DIFLAG_FILESTREAM) flags |= XFS_XFLAG_FILESTREAM; + if (di_flags2 & XFS_DIFLAG2_REFLINK) + flags |= XFS_XFLAG_REFLINK; } return flags; @@ -602,7 +605,7 @@ xfs_ip2xflags( { xfs_icdinode_t *dic = &ip->i_d; - return _xfs_dic2xflags(dic->di_flags) | + return _xfs_dic2xflags(dic->di_flags, dic->di_flags2) | (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); } @@ -610,7 +613,8 @@ uint xfs_dic2xflags( xfs_dinode_t *dip) { - return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | + return _xfs_dic2xflags(be16_to_cpu(dip->di_flags), + be64_to_cpu(dip->di_flags2)) | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index f3efe9a..454d7a8 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -870,6 +870,10 @@ xfs_merge_ioc_xflags( xflags |= XFS_XFLAG_NODUMP; else xflags &= ~XFS_XFLAG_NODUMP; + if (flags & FS_NOCOW_FL) + xflags &= ~XFS_XFLAG_REFLINK; + else + xflags |= XFS_XFLAG_REFLINK; return xflags; } @@ -1002,9 +1006,11 @@ static int xfs_ioctl_setattr_xflags( struct xfs_trans *tp, struct xfs_inode *ip, - struct fsxattr *fa) + struct fsxattr *fa, + struct file *filp) { struct xfs_mount *mp = ip->i_mount; + int error; /* Can't change realtime flag if any extents are allocated. */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && @@ -1028,6 +1034,9 @@ xfs_ioctl_setattr_xflags( return -EPERM; xfs_set_diflags(ip, fa->fsx_xflags); + error = xfs_reflink_end_unshare(ip, fa->fsx_xflags); + if (error) + return error; xfs_diflags_to_linux(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -1170,7 +1179,8 @@ xfs_ioctl_setattr_check_projid( STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, - struct fsxattr *fa) + struct fsxattr *fa, + struct file *filp) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -1181,6 +1191,10 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); + code = xfs_reflink_check_flag_adjust(ip, &fa->fsx_xflags); + if (code) + return code; + code = xfs_ioctl_setattr_check_projid(ip, fa); if (code) return code; @@ -1201,6 +1215,10 @@ xfs_ioctl_setattr( return code; } + code = xfs_reflink_start_unshare(ip, fa->fsx_xflags, filp); + if (code) + return code; + tp = xfs_ioctl_setattr_get_trans(ip); if (IS_ERR(tp)) { code = PTR_ERR(tp); @@ -1220,7 +1238,7 @@ xfs_ioctl_setattr( if (code) goto error_trans_cancel; - code = xfs_ioctl_setattr_xflags(tp, ip, fa); + code = xfs_ioctl_setattr_xflags(tp, ip, fa, filp); if (code) goto error_trans_cancel; @@ -1290,7 +1308,7 @@ xfs_ioc_fssetxattr( error = mnt_want_write_file(filp); if (error) return error; - error = xfs_ioctl_setattr(ip, &fa); + error = xfs_ioctl_setattr(ip, &fa, filp); mnt_drop_write_file(filp); return error; } @@ -1303,6 +1321,7 @@ xfs_ioc_getxflags( unsigned int flags; flags = xfs_di2lxflags(ip->i_d.di_flags); + xfs_reflink_get_lxflags(ip, &flags); if (copy_to_user(arg, &flags, sizeof(flags))) return -EFAULT; return 0; @@ -1324,22 +1343,30 @@ xfs_ioc_setxflags( if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL)) + FS_SYNC_FL | FS_NOCOW_FL)) return -EOPNOTSUPP; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); + error = xfs_reflink_check_flag_adjust(ip, &fa.fsx_xflags); + if (error) + return error; + error = mnt_want_write_file(filp); if (error) return error; + error = xfs_reflink_start_unshare(ip, fa.fsx_xflags, filp); + if (error) + return error; + tp = xfs_ioctl_setattr_get_trans(ip); if (IS_ERR(tp)) { error = PTR_ERR(tp); goto out_drop_write; } - error = xfs_ioctl_setattr_xflags(tp, ip, &fa); + error = xfs_ioctl_setattr_xflags(tp, ip, &fa, filp); if (error) { xfs_trans_cancel(tp); goto out_drop_write; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index f2086f6b..af6ec92 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1555,3 +1555,337 @@ out_error: trace_xfs_reflink_range_error(dest, error, _RET_IP_); return error; } + +/** + * xfs_reflink_get_lxflags() - set reflink-related linux inode flags + * + * @ip: XFS inode + * @flags: Pointer to the user-visible inode flags + */ +void +xfs_reflink_get_lxflags( + struct xfs_inode *ip, /* XFS inode */ + unsigned int *flags) /* user flags */ +{ + /* + * If this is a reflink-capable filesystem and there are no shared + * blocks, then this is a "nocow" file. + */ + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || + xfs_is_reflink_inode(ip)) + return; + *flags |= FS_NOCOW_FL; +} + + +/** + * xfs_reflink_dirty_range() -- Dirty all the shared blocks in the file so that + * they're rewritten elsewhere. Similar to generic_perform_write(). + * + * @filp: VFS file pointer + * @pos: offset to start dirtying + * @len: number of bytes to dirty + */ +STATIC int +xfs_reflink_dirty_range( + struct file *filp, + xfs_off_t pos, + xfs_off_t len) +{ + struct address_space *mapping; + const struct address_space_operations *a_ops; + int error; + unsigned int flags; + struct page *page; + struct page *rpage; + unsigned long offset; /* Offset into pagecache page */ + unsigned long bytes; /* Bytes to write to page */ + void *fsdata; + + mapping = filp->f_mapping; + a_ops = mapping->a_ops; + flags = AOP_FLAG_UNINTERRUPTIBLE; + do { + + offset = (pos & (PAGE_CACHE_SIZE - 1)); + bytes = min_t(unsigned long, len, PAGE_CACHE_SIZE) - offset; + rpage = xfs_get_page(file_inode(filp), pos); + if (IS_ERR(rpage)) { + error = PTR_ERR(rpage); + break; + } else if (!rpage) { + error = -ENOMEM; + break; + } + + error = a_ops->write_begin(filp, mapping, pos, bytes, flags, + &page, &fsdata); + page_cache_release(rpage); + if (error < 0) + break; + + trace_xfs_reflink_unshare_page(file_inode(filp), page, + pos, bytes); + + if (!PageUptodate(page)) { + printk(KERN_ERR "%s: STALE? ino=%lu pos=%llu\n", + __func__, filp->f_inode->i_ino, pos); + WARN_ON(1); + } + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + + error = a_ops->write_end(filp, mapping, pos, bytes, bytes, + page, fsdata); + if (error < 0) + break; + else if (error == 0) { + error = -EIO; + break; + } else { + bytes = error; + error = 0; + } + + cond_resched(); + + pos += bytes; + len -= bytes; + + balance_dirty_pages_ratelimited(mapping); + if (fatal_signal_pending(current)) { + error = -EINTR; + break; + } + } while (len > 0); + + return error; +} + +/** + * xfs_reflink_check_flag_adjust() - the only change we allow to the inode + * reflink flag is to clear it when the fs supports reflink. + * + * @ip: XFS inode + * @xflags: XFS in-core inode flags + */ +int +xfs_reflink_check_flag_adjust( + struct xfs_inode *ip, + unsigned int *xflags) +{ + unsigned int chg; + + chg = !!(*xflags & XFS_XFLAG_REFLINK) ^ !!xfs_is_reflink_inode(ip); + + if (!chg) + return 0; + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb)) + return -EOPNOTSUPP; + if (*xflags & XFS_XFLAG_REFLINK) { + *xflags &= ~XFS_XFLAG_REFLINK; + return 0; + } + return 0; +} + +/** + * xfs_reflink_start_unshare() - dirty all the shared blocks so that they + * can be reallocated elsewhere, in preparation for clearing the reflink + * hint. + * + * @ip: XFS inode + * @xflags: XFS in-core inode flags + * @filp: VFS file structure + */ +int +xfs_reflink_start_unshare( + struct xfs_inode *ip, + unsigned int xflags, + struct file *filp) +{ + struct xfs_mount *mp = ip->i_mount; + int error = 0; + xfs_fileoff_t fbno; + xfs_filblks_t end; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t len; + xfs_nlink_t nr; + xfs_off_t isize; + xfs_off_t fpos; + xfs_off_t flen; + struct xfs_bmbt_irec map[2]; + int nmaps; + + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || + (xflags & XFS_XFLAG_REFLINK) || + !xfs_is_reflink_inode(ip)) + return 0; + + inode_dio_wait(VFS_I(ip)); + + /* + * The user wants to preemptively CoW all shared blocks in this file, + * which enables us to turn off the reflink flag. Iterate all + * extents which are not prealloc/delalloc to see which ranges are + * mentioned in the refcount tree, then read those blocks into the + * pagecache, dirty them, fsync them back out, and then we can update + * the inode flag. What happens if we run out of memory? :) + */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + fbno = 0; + isize = i_size_read(VFS_I(ip)); + if (isize == 0) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; + } + + trace_xfs_reflink_start_unshare(ip); + + end = XFS_B_TO_FSB(mp, isize); + while (end - fbno > 0) { + nmaps = 1; + /* + * Look for extents in the file. Skip holes, delalloc, or + * unwritten extents; they can't be reflinked. + */ + error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); + if (error) + goto out_unlock; + if (nmaps == 0) + break; + if (map[0].br_startblock == HOLESTARTBLOCK || + map[0].br_startblock == DELAYSTARTBLOCK || + ISUNWRITTEN(&map[0])) + goto next; + + map[1] = map[0]; + while (map[1].br_blockcount) { + agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); + CHECK_AG_NUMBER(mp, agno); + CHECK_AG_EXTENT(mp, agbno, 1); + + error = xfs_reflink_get_refcount(mp, agno, agbno, + &len, &nr); + if (error) + goto out_unlock; + XFS_WANT_CORRUPTED_GOTO(mp, len != 0, out_unlock); + if (len > map[1].br_blockcount) + len = map[1].br_blockcount; + if (nr < 2) + goto skip_copy; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + fpos = XFS_FSB_TO_B(mp, map[1].br_startoff); + flen = XFS_FSB_TO_B(mp, len); + if (fpos + flen > isize) + flen = isize - fpos; + error = xfs_reflink_dirty_range(filp, fpos, flen); + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (error) + goto out_unlock; +skip_copy: + map[1].br_blockcount -= len; + map[1].br_startoff += len; + map[1].br_startblock += len; + } + +next: + fbno = map[0].br_startoff + map[0].br_blockcount; + } + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error == 0) + error = filemap_write_and_wait(filp->f_mapping); + else + trace_xfs_reflink_start_unshare_error(ip, error, _RET_IP_); + return error; +} + +/** + * xfs_reflink_end_unshare() - finish removing reflink flag from inode + * + * @ip: XFS inode + * @xflags: XFS in-core inode flags + */ +int /* error */ +xfs_reflink_end_unshare( + struct xfs_inode *ip, /* XFS inode */ + unsigned int xflags) /* VFS file structure */ +{ + struct xfs_mount *mp = ip->i_mount; + int error; + xfs_fileoff_t fbno; + xfs_filblks_t end; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t len; + xfs_nlink_t nr; + struct xfs_bmbt_irec map[2]; + int nmaps; + + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || + (xflags & XFS_XFLAG_REFLINK) || + !xfs_is_reflink_inode(ip)) + return 0; + + trace_xfs_reflink_end_unshare(ip); + + /* + * Earlier we copied all the shared blocks in this file to new blocks. + * However, we dropped the ilock before getting the transaction, so + * check that nobody wandered in and added more reflinks. + */ + fbno = 0; + end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip))); + while (end - fbno > 0) { + nmaps = 1; + /* + * Look for extents in the file. We can skip the refcount + * check on holes, delalloc, and unwritten extents; they can't + * be reflinked. + */ + error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); + if (error) + goto out_unlock; + if (nmaps == 0) + break; + if (map[0].br_startblock == HOLESTARTBLOCK || + map[0].br_startblock == DELAYSTARTBLOCK || + ISUNWRITTEN(&map[0])) + goto next; + + map[1] = map[0]; + while (map[1].br_blockcount) { + agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); + CHECK_AG_NUMBER(mp, agno); + CHECK_AG_EXTENT(mp, agbno, 1); + + error = xfs_reflink_get_refcount(mp, agno, agbno, + &len, &nr); + if (error) + goto out_unlock; + XFS_WANT_CORRUPTED_GOTO(mp, len != 0, out_unlock); + if (len > map[1].br_blockcount) + len = map[1].br_blockcount; + if (nr > 1) { + error = -EINTR; + goto out_unlock; + } + map[1].br_blockcount -= len; + map[1].br_startblock += len; + } + +next: + fbno = map[0].br_startoff + map[0].br_blockcount; + } + + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; +out_unlock: + if (error) + trace_xfs_reflink_end_unshare_error(ip, error, _RET_IP_); + return error; +} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index c60a9bd..aaa26ed 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -51,4 +51,11 @@ extern int xfs_reflink(struct xfs_inode *src, xfs_off_t srcoff, struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, unsigned int flags); +extern void xfs_reflink_get_lxflags(struct xfs_inode *ip, unsigned int *flags); +extern int xfs_reflink_check_flag_adjust(struct xfs_inode *ip, + unsigned int *xflags); +extern int xfs_reflink_start_unshare(struct xfs_inode *ip, unsigned int xflags, + struct file *filp); +extern int xfs_reflink_end_unshare(struct xfs_inode *ip, unsigned int xflags); + #endif /* __XFS_REFLINK_H */ _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs