On Fri, Aug 25, 2017 at 11:05:57AM -0400, Brian Foster wrote: > The owner change bmbt scan that occurs during extent swap operations > does not handle ordered buffer failures. Buffers that cannot be > marked ordered must be physically logged so previously dirty ranges > of the buffer can be relogged in the transaction. > > Since the bmbt scan may need to process and potentially log a large > number of blocks, we can't expect to complete this operation in a > single transaction. Update extent swap to use a permanent > transaction with enough log reservation to physically log a buffer. > Update the bmbt scan to physically log any buffers that cannot be > ordered and to terminate the scan with -EAGAIN. On -EAGAIN, the > caller rolls the transaction and restarts the scan. Finally, update > the bmbt scan helper function to skip bmbt blocks that already match > the expected owner so they are not reprocessed after scan restarts. > > Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx> Looks ok, I think... Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > fs/xfs/libxfs/xfs_btree.c | 26 ++++++++++++++------- > fs/xfs/xfs_bmap_util.c | 57 ++++++++++++++++++++++++++++++++++++++--------- > 2 files changed, 65 insertions(+), 18 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c > index d06b04d..c466a23 100644 > --- a/fs/xfs/libxfs/xfs_btree.c > +++ b/fs/xfs/libxfs/xfs_btree.c > @@ -4452,10 +4452,15 @@ xfs_btree_block_change_owner( > > /* modify the owner */ > block = xfs_btree_get_block(cur, level, &bp); > - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) > + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { > + if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) > + return 0; > block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); > - else > + } else { > + if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) > + return 0; > block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); > + } > > /* > * If the block is a root block hosted in an inode, we might not have a > @@ -4464,14 +4469,19 @@ xfs_btree_block_change_owner( > * block is formatted into the on-disk inode fork. We still change it, > * though, so everything is consistent in memory. > */ > - if (bp) { > - if (cur->bc_tp) > - xfs_trans_ordered_buf(cur->bc_tp, bp); > - else > - xfs_buf_delwri_queue(bp, bbcoi->buffer_list); > - } else { > + if (!bp) { > ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); > ASSERT(level == cur->bc_nlevels - 1); > + return 0; > + } > + > + if (cur->bc_tp) { > + if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { > + xfs_btree_log_block(cur, bp, XFS_BB_OWNER); > + return -EAGAIN; > + } > + } else { > + xfs_buf_delwri_queue(bp, bbcoi->buffer_list); > } > > return 0; > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > index ee8fb9a..3e9b7a4 100644 > --- a/fs/xfs/xfs_bmap_util.c > +++ b/fs/xfs/xfs_bmap_util.c > @@ -1929,6 +1929,48 @@ xfs_swap_extent_forks( > return 0; > } > > +/* > + * Fix up the owners of the bmbt blocks to refer to the current inode. The > + * change owner scan attempts to order all modified buffers in the current > + * transaction. In the event of ordered buffer failure, the offending buffer is > + * physically logged as a fallback and the scan returns -EAGAIN. We must roll > + * the transaction in this case to replenish the fallback log reservation and > + * restart the scan. This process repeats until the scan completes. > + */ > +static int > +xfs_swap_change_owner( > + struct xfs_trans **tpp, > + struct xfs_inode *ip, > + struct xfs_inode *tmpip) > +{ > + int error; > + struct xfs_trans *tp = *tpp; > + > + do { > + error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, > + NULL); > + /* success or fatal error */ > + if (error != -EAGAIN) > + break; > + > + error = xfs_trans_roll(tpp, NULL); > + if (error) > + break; > + tp = *tpp; > + > + /* > + * Redirty both inodes so they can relog and keep the log tail > + * moving forward. > + */ > + xfs_trans_ijoin(tp, ip, 0); > + xfs_trans_ijoin(tp, tmpip, 0); > + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); > + xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); > + } while (true); > + > + return error; > +} > + > int > xfs_swap_extents( > struct xfs_inode *ip, /* target inode */ > @@ -1943,7 +1985,7 @@ xfs_swap_extents( > int lock_flags; > struct xfs_ifork *cowfp; > uint64_t f; > - int resblks; > + int resblks = 0; > > /* > * Lock the inodes against other IO, page faults and truncate to > @@ -1991,11 +2033,8 @@ xfs_swap_extents( > XFS_SWAP_RMAP_SPACE_RES(mp, > XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), > XFS_DATA_FORK); > - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, > - 0, 0, &tp); > - } else > - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, > - 0, 0, &tp); > + } > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); > if (error) > goto out_unlock; > > @@ -2087,14 +2126,12 @@ xfs_swap_extents( > * inode number of the current inode. > */ > if (src_log_flags & XFS_ILOG_DOWNER) { > - error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, > - ip->i_ino, NULL); > + error = xfs_swap_change_owner(&tp, ip, tip); > if (error) > goto out_trans_cancel; > } > if (target_log_flags & XFS_ILOG_DOWNER) { > - error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, > - tip->i_ino, NULL); > + error = xfs_swap_change_owner(&tp, tip, ip); > if (error) > goto out_trans_cancel; > } > -- > 2.9.5 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html