On Mon, Dec 05, 2016 at 10:05:23PM +0100, Christoph Hellwig wrote: > When we allocate COW fork blocks for direct I/O writes we currently first > create a delayed allocation, and then convert it to a real allocation > once we've got the delayed one. > > As there is no good reason for that this patch instead makes use call > xfs_bmapi_write from the COW allocation path. The only interesting bits > are a few tweaks the low-level allocator to allow for this, most notably > the need to remove the call to xfs_bmap_extsize_align for the cowextsize > in xfs_bmap_btalloc - for the existing convert case it's a no-op, but > for the direct allocation case it would blow up our block reservation > way beyond what we reserved for the transaction. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/libxfs/xfs_bmap.c | 9 ++--- > fs/xfs/xfs_reflink.c | 101 ++++++++++++++++++++++++++++++++++------------- > 2 files changed, 76 insertions(+), 34 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index 6f28814..a4a4327 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -2893,13 +2893,14 @@ xfs_bmap_add_extent_hole_real( > ASSERT(!isnullstartblock(new->br_startblock)); > ASSERT(!bma->cur || > !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); > - ASSERT(whichfork != XFS_COW_FORK); > > XFS_STATS_INC(mp, xs_add_exlist); > > state = 0; > if (whichfork == XFS_ATTR_FORK) > state |= BMAP_ATTRFORK; > + if (whichfork == XFS_COW_FORK) > + state |= BMAP_COWFORK; > > /* > * Check and set flags if this segment has a left neighbor. > @@ -3619,9 +3620,7 @@ xfs_bmap_btalloc( > else if (mp->m_dalign) > stripe_align = mp->m_dalign; > > - if (ap->flags & XFS_BMAPI_COWFORK) > - align = xfs_get_cowextsz_hint(ap->ip); > - else if (xfs_alloc_is_userdata(ap->datatype)) > + if (xfs_alloc_is_userdata(ap->datatype)) Doesn't this defeat the purpose (i.e., fragmentation avoidance via wider allocations) of the cowextszhint for direct I/O? I think it would be better to be consistent with the approach for traditional I/O + extsz and incorporate the alignment into the reservation. Perhaps the hunk of code that already does just that in xfs_iomap_write_direct() could be converted to a small helper and reused..? > align = xfs_get_extsz_hint(ap->ip); > if (unlikely(align)) { > error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, > @@ -4608,8 +4607,6 @@ xfs_bmapi_write( > */ > if (flags & XFS_BMAPI_REMAP) > ASSERT(inhole); > - if (flags & XFS_BMAPI_COWFORK) > - ASSERT(!inhole); > > /* > * First, deal with the hole before the allocated space > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index 88fd03c..0a077e80 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -304,59 +304,104 @@ __xfs_reflink_allocate_cow( > xfs_fileoff_t end_fsb) > { > struct xfs_mount *mp = ip->i_mount; > - struct xfs_bmbt_irec imap; > + struct xfs_bmbt_irec imap, got; > struct xfs_defer_ops dfops; > struct xfs_trans *tp; > xfs_fsblock_t first_block; > - int nimaps = 1, error; > - bool shared; > + int nimaps, error, lockmode; > + bool shared, trimmed; > + xfs_extlen_t resblks, align; > + xfs_extnum_t idx; > > - xfs_defer_init(&dfops, &first_block); > + align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); > + if (align) > + end_fsb = roundup_64(end_fsb, align); > > - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, > - XFS_TRANS_RESERVE, &tp); > + resblks = XFS_DIOSTRAT_SPACE_RES(mp, end_fsb - *offset_fsb); > + > + error = xfs_qm_dqattach(ip, 0); This is already in the (only) caller. Brian > if (error) > return error; > > - xfs_ilock(ip, XFS_ILOCK_EXCL); > - > - /* Read extent from the source file. */ > - nimaps = 1; > - error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, > - &imap, &nimaps, 0); > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); > if (error) > - goto out_unlock; > - ASSERT(nimaps == 1); > + return error; > > - error = xfs_reflink_reserve_cow(ip, &imap, &shared); > - if (error) > - goto out_trans_cancel; > + lockmode = XFS_ILOCK_EXCL; > + xfs_ilock(ip, lockmode); > > - if (!shared) { > - *offset_fsb = imap.br_startoff + imap.br_blockcount; > - goto out_trans_cancel; > + /* > + * Even if the extent is not shared we might have a preallocation for > + * it in the COW fork. If so use it. > + */ > + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) && > + got.br_startoff <= *offset_fsb) { > + /* If we have a real allocation in the COW fork we're done. */ > + if (!isnullstartblock(got.br_startblock)) { > + xfs_trim_extent(&got, *offset_fsb, > + end_fsb - *offset_fsb); > + *offset_fsb = got.br_startoff + got.br_blockcount; > + goto out_trans_cancel; > + } > + } else { > + nimaps = 1; > + error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, > + &imap, &nimaps, 0); > + if (error) > + goto out_trans_cancel; > + ASSERT(nimaps == 1); > + > + /* Trim the mapping to the nearest shared extent boundary. */ > + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, > + &trimmed); > + if (error) > + goto out_trans_cancel; > + > + if (!shared) { > + *offset_fsb = imap.br_startoff + imap.br_blockcount; > + goto out_trans_cancel; > + } > + > + *offset_fsb = imap.br_startoff; > + end_fsb = imap.br_startoff + imap.br_blockcount; > + if (align) > + end_fsb = roundup_64(end_fsb, align); > } > > - xfs_trans_ijoin(tp, ip, 0); > - error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, > - XFS_BMAPI_COWFORK, &first_block, > - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), > - &imap, &nimaps, &dfops); > + error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, > + XFS_QMOPT_RES_REGBLKS); > if (error) > goto out_trans_cancel; > > + xfs_trans_ijoin(tp, ip, 0); > + > + xfs_defer_init(&dfops, &first_block); > + nimaps = 1; > + error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb, > + XFS_BMAPI_COWFORK, &first_block, resblks, &imap, > + &nimaps, &dfops); > + if (error) > + goto out_bmap_cancel; > + > error = xfs_defer_finish(&tp, &dfops, NULL); > if (error) > - goto out_trans_cancel; > + goto out_bmap_cancel; > > error = xfs_trans_commit(tp); > + if (error) > + goto out_unlock; > > *offset_fsb = imap.br_startoff + imap.br_blockcount; > + > out_unlock: > - xfs_iunlock(ip, XFS_ILOCK_EXCL); > + xfs_iunlock(ip, lockmode); > return error; > -out_trans_cancel: > + > +out_bmap_cancel: > xfs_defer_cancel(&dfops); > + xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, > + XFS_QMOPT_RES_REGBLKS); > +out_trans_cancel: > xfs_trans_cancel(tp); > goto out_unlock; > } > -- > 2.1.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html