The current version of xfs_reflink_allocate_cow_range may stumble over invalid entries in the extent array given that it drops the ilock but still expects the index to be stable. Simple fixing it to a new lookup for every iteration still isn't correct given that xfs_bmapi_allocate will trigger a BUG_ON() if hitting a hole, and there is nothing preventing a xfs_bunmapi_cow call removing extents once we dropped the ilock either. The right long term implementation would be to not do a detour through a delayed allocation for direct I/O and just got straight to and on-disk allocation. Given how late it is in the merge window this patch instead duplicates the inner loop of xfs_bmapi_allocate into a helper for xfs_reflink_allocate_cow_range so that it can be done under the same ilock criticical section as our COW fork delayed allocation. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/xfs/xfs_file.c | 4 -- fs/xfs/xfs_reflink.c | 134 +++++++++++++++++++++++++++++++++------------------ fs/xfs/xfs_reflink.h | 4 +- fs/xfs/xfs_trace.h | 1 - 4 files changed, 90 insertions(+), 53 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aee0c4c..349f328 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -675,10 +675,6 @@ xfs_file_dio_aio_write( /* If this is a block-aligned directio CoW, remap immediately. */ if (xfs_is_reflink_inode(ip) && !unaligned_io) { - ret = xfs_reflink_reserve_cow_range(ip, iocb->ki_pos, count); - if (ret) - goto out; - ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count); if (ret) goto out; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 09e0e27..c18692c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -253,7 +253,8 @@ static int __xfs_reflink_reserve_cow( struct xfs_inode *ip, xfs_fileoff_t *offset_fsb, - xfs_fileoff_t end_fsb) + xfs_fileoff_t end_fsb, + bool *skipped) { struct xfs_bmbt_irec got, prev, imap; xfs_fileoff_t orig_end_fsb; @@ -287,8 +288,10 @@ __xfs_reflink_reserve_cow( end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) + if (!shared) { + *skipped = true; goto done; + } /* * Fork all the shared blocks from our write offset until the end of @@ -337,6 +340,7 @@ xfs_reflink_reserve_cow_range( { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb, end_fsb; + bool skipped = false; int error; trace_xfs_reflink_reserve_cow_range(ip, offset, count); @@ -346,7 +350,8 @@ xfs_reflink_reserve_cow_range( xfs_ilock(ip, XFS_ILOCK_EXCL); while (offset_fsb < end_fsb) { - error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb); + error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb, + &skipped); if (error) { trace_xfs_reflink_reserve_cow_range_error(ip, error, _RET_IP_); @@ -358,63 +363,100 @@ xfs_reflink_reserve_cow_range( return error; } -/* - * Allocate blocks to all CoW reservations within a byte range of a file. - */ -int -xfs_reflink_allocate_cow_range( +static int +__xfs_reflink_allocate_cow( struct xfs_inode *ip, - xfs_off_t pos, - xfs_off_t len) + xfs_fileoff_t *offset_fsb, + xfs_fileoff_t end_fsb) { - struct xfs_ifork *ifp; - struct xfs_bmbt_rec_host *gotp; + struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; - int error = 0; - xfs_fileoff_t start_lblk; - xfs_fileoff_t end_lblk; - xfs_extnum_t idx; + struct xfs_defer_ops dfops; + struct xfs_trans *tp; + xfs_fsblock_t first_block; + xfs_fileoff_t next_fsb; + int nimaps = 1, error; + bool skipped = false; - if (!xfs_is_reflink_inode(ip)) - return 0; + xfs_defer_init(&dfops, &first_block); - trace_xfs_reflink_allocate_cow_range(ip, len, pos); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, + XFS_TRANS_RESERVE, &tp); + if (error) + return error; - start_lblk = XFS_B_TO_FSBT(ip->i_mount, pos); - end_lblk = XFS_B_TO_FSB(ip->i_mount, pos + len); - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_ilock(ip, XFS_ILOCK_EXCL); - gotp = xfs_iext_bno_to_ext(ifp, start_lblk, &idx); - while (gotp) { - xfs_bmbt_get_all(gotp, &imap); + next_fsb = *offset_fsb; + error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); + if (error) + goto out_trans_cancel; - if (imap.br_startoff >= end_lblk) - break; - if (!isnullstartblock(imap.br_startblock)) - goto advloop; - xfs_trim_extent(&imap, start_lblk, end_lblk - start_lblk); - trace_xfs_reflink_allocate_cow_extent(ip, &imap); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, - XFS_FSB_TO_B(ip->i_mount, imap.br_startoff + - imap.br_blockcount - 1), &imap); - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (error) - break; -advloop: - /* Roll on... */ - idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - break; - gotp = xfs_iext_get_ext(ifp, idx); + if (skipped) { + *offset_fsb = next_fsb; + goto out_trans_cancel; } + xfs_trans_ijoin(tp, ip, 0); + error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, + XFS_BMAPI_COWFORK, &first_block, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), + &imap, &nimaps, &dfops); + if (error) + goto out_trans_cancel; + + /* We might not have been able to map the whole delalloc extent */ + *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb); + + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto out_trans_cancel; + + error = xfs_trans_commit(tp); + +out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +out_trans_cancel: + xfs_defer_cancel(&dfops); + xfs_trans_cancel(tp); + goto out_unlock; +} +/* + * Allocate blocks to all CoW reservations within a byte range of a file. + */ +int +xfs_reflink_allocate_cow_range( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); + int error; + + ASSERT(xfs_is_reflink_inode(ip)); + + trace_xfs_reflink_allocate_cow_range(ip, offset, count); + + /* + * Make sure that the dquots are there. + */ + error = xfs_qm_dqattach(ip, 0); if (error) - trace_xfs_reflink_allocate_cow_range_error(ip, error, _RET_IP_); + return error; + + while (offset_fsb < end_fsb) { + error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); + if (error) { + trace_xfs_reflink_allocate_cow_range_error(ip, error, + _RET_IP_); + break; + } + } + return error; } diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 0e19ec6..859ca50 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -28,8 +28,8 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); -extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, xfs_off_t pos, - xfs_off_t len); +extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, + xfs_off_t offset, xfs_off_t count); extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, struct xfs_bmbt_irec *imap, bool *need_alloc); extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c8fb91c..26113b9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3320,7 +3320,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); -DEFINE_INODE_IREC_EVENT(xfs_reflink_allocate_cow_extent); DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html