On Thu, Sep 29, 2016 at 08:10:05PM -0700, Darrick J. Wong wrote: > Reflink extents from one file to another; that is to say, iteratively > remove the mappings from the destination file, copy the mappings from > the source file to the destination file, and increment the reference > count of all the blocks that got remapped. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > v2: Call xfs_defer_cancel before cancelling the transaction if the > remap operation fails. Use the deferred operations system to avoid > deadlocks or blowing out the transaction reservation, and make the > entire reflink operation atomic for each extent being remapped. The > destination file's i_size will be updated if necessary to avoid > violating the assumption that there are no shared blocks past the EOF > block. > --- > fs/xfs/xfs_reflink.c | 425 ++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_reflink.h | 2 > 2 files changed, 427 insertions(+) > > > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index 673ecc1..94c19fff 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -922,3 +922,428 @@ xfs_reflink_recover_cow( > > return error; > } ... > +/* > + * Unmap a range of blocks from a file, then map other blocks into the hole. > + * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount). > + * The extent irec is mapped into dest at irec->br_startoff. > + */ > +STATIC int > +xfs_reflink_remap_extent( > + struct xfs_inode *ip, > + struct xfs_bmbt_irec *irec, > + xfs_fileoff_t destoff, > + xfs_off_t new_isize) > +{ > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_trans *tp; > + xfs_fsblock_t firstfsb; > + unsigned int resblks; > + struct xfs_defer_ops dfops; > + struct xfs_bmbt_irec uirec; > + bool real_extent; > + xfs_filblks_t rlen; > + xfs_filblks_t unmap_len; > + xfs_off_t newlen; > + int error; > + > + unmap_len = irec->br_startoff + irec->br_blockcount - destoff; > + trace_xfs_reflink_punch_range(ip, destoff, unmap_len); > + > + /* Only remap normal extents. */ > + real_extent = (irec->br_startblock != HOLESTARTBLOCK && > + irec->br_startblock != DELAYSTARTBLOCK && > + !ISUNWRITTEN(irec)); > + > + /* Start a rolling transaction to switch the mappings */ > + resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); > + if (error) > + goto out; > + > + xfs_ilock(ip, XFS_ILOCK_EXCL); > + xfs_trans_ijoin(tp, ip, 0); > + > + /* If we're not just clearing space, then do we have enough quota? */ > + if (real_extent) { > + error = xfs_trans_reserve_quota_nblks(tp, ip, > + irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS); > + if (error) > + goto out_cancel; > + } > + > + trace_xfs_reflink_remap(ip, irec->br_startoff, > + irec->br_blockcount, irec->br_startblock); > + > + /* Unmap the old blocks in the data fork. */ > + rlen = unmap_len; > + while (rlen) { > + xfs_defer_init(&dfops, &firstfsb); > + error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, > + &firstfsb, &dfops); > + if (error) > + goto out_defer; > + > + /* Trim the extent to whatever got unmapped. */ > + uirec = *irec; > + xfs_trim_extent(&uirec, destoff + rlen, unmap_len - rlen); > + unmap_len = rlen; > + > + /* If this isn't a real mapping, we're done. */ > + if (!real_extent || uirec.br_blockcount == 0) > + goto next_extent; > + Any reason we couldn't reuse existing mechanisms for this? E.g., hole punch the dest file range before we remap the source file extents. That might change behavior in the event of a partial/failed reflink, but it's not clear to me that matters. > + trace_xfs_reflink_remap(ip, uirec.br_startoff, > + uirec.br_blockcount, uirec.br_startblock); > + ... > +} > + > +/* > + * Iteratively remap one file's extents (and holes) to another's. > + */ > +STATIC int > +xfs_reflink_remap_blocks( > + struct xfs_inode *src, > + xfs_fileoff_t srcoff, > + struct xfs_inode *dest, > + xfs_fileoff_t destoff, > + xfs_filblks_t len, > + xfs_off_t new_isize) > +{ > + struct xfs_bmbt_irec imap; > + int nimaps; > + int error = 0; > + xfs_filblks_t range_len; > + > + /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ > + while (len) { > + trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, > + dest, destoff); > + /* Read extent from the source file */ > + nimaps = 1; > + xfs_ilock(src, XFS_ILOCK_EXCL); > + error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); > + xfs_iunlock(src, XFS_ILOCK_EXCL); > + if (error) > + goto err; > + ASSERT(nimaps == 1); > + > + trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE, > + &imap); > + > + /* Translate imap into the destination file. */ > + range_len = imap.br_startoff + imap.br_blockcount - srcoff; > + imap.br_startoff += destoff - srcoff; > + Just FYI... these are all unsigned vars... Brian > + /* Clear dest from destoff to the end of imap and map it in. */ > + error = xfs_reflink_remap_extent(dest, &imap, destoff, > + new_isize); > + if (error) > + goto err; > + > + if (fatal_signal_pending(current)) { > + error = -EINTR; > + goto err; > + } > + > + /* Advance drange/srange */ > + srcoff += range_len; > + destoff += range_len; > + len -= range_len; > + } > + > + return 0; > + > +err: > + trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); > + return error; > +} > + > +/* > + * Link a range of blocks from one file to another. > + */ > +int > +xfs_reflink_remap_range( > + struct xfs_inode *src, > + xfs_off_t srcoff, > + struct xfs_inode *dest, > + xfs_off_t destoff, > + xfs_off_t len) > +{ > + struct xfs_mount *mp = src->i_mount; > + xfs_fileoff_t sfsbno, dfsbno; > + xfs_filblks_t fsblen; > + int error; > + > + if (!xfs_sb_version_hasreflink(&mp->m_sb)) > + return -EOPNOTSUPP; > + > + if (XFS_FORCED_SHUTDOWN(mp)) > + return -EIO; > + > + /* Don't reflink realtime inodes */ > + if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) > + return -EINVAL; > + > + trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); > + > + /* Lock both files against IO */ > + if (src->i_ino == dest->i_ino) { > + xfs_ilock(src, XFS_IOLOCK_EXCL); > + xfs_ilock(src, XFS_MMAPLOCK_EXCL); > + } else { > + xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); > + xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); > + } > + > + error = xfs_reflink_set_inode_flag(src, dest); > + if (error) > + goto out_error; > + > + /* > + * Invalidate the page cache so that we can clear any CoW mappings > + * in the destination file. > + */ > + truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, > + PAGE_ALIGN(destoff + len) - 1); > + > + dfsbno = XFS_B_TO_FSBT(mp, destoff); > + sfsbno = XFS_B_TO_FSBT(mp, srcoff); > + fsblen = XFS_B_TO_FSB(mp, len); > + error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, > + destoff + len); > + if (error) > + goto out_error; > + > + error = xfs_reflink_update_dest(dest, destoff + len); > + if (error) > + goto out_error; > + > +out_error: > + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); > + xfs_iunlock(src, XFS_IOLOCK_EXCL); > + if (src->i_ino != dest->i_ino) { > + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); > + xfs_iunlock(dest, XFS_IOLOCK_EXCL); > + } > + if (error) > + trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); > + return error; > +} > diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h > index 1d2f180..c35ce29 100644 > --- a/fs/xfs/xfs_reflink.h > +++ b/fs/xfs/xfs_reflink.h > @@ -43,5 +43,7 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, > extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, > xfs_off_t count); > extern int xfs_reflink_recover_cow(struct xfs_mount *mp); > +extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, > + struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len); > > #endif /* __XFS_REFLINK_H */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html