On Mon, Dec 03, 2018 at 05:25:00PM -0500, Christoph Hellwig wrote: > Besides simplifying the code a bit this allows to actually implement > the behavior of using COW preallocation for non-COW data mentioned > in the current comments. > > Note that this breaks the current version of xfs/420, but that is > because the test is broken. A separate fix will be sent for it. (Still waiting for this...) > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/xfs_iomap.c | 132 ++++++++++++++++++++++++++++++------------- > fs/xfs/xfs_reflink.c | 67 ---------------------- > fs/xfs/xfs_trace.h | 1 - > 3 files changed, 93 insertions(+), 107 deletions(-) > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index d851abac16a9..d19f99e5476a 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay( > { > struct xfs_inode *ip = XFS_I(inode); > struct xfs_mount *mp = ip->i_mount; > - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > xfs_fileoff_t maxbytes_fsb = > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); > xfs_fileoff_t end_fsb; > - int error = 0, eof = 0; > - struct xfs_bmbt_irec got; > - struct xfs_iext_cursor icur; > + struct xfs_bmbt_irec imap, cmap; > + struct xfs_iext_cursor icur, ccur; > xfs_fsblock_t prealloc_blocks = 0; > + bool eof = false, cow_eof = false, shared; > + int whichfork = XFS_DATA_FORK; > + int error = 0; > > ASSERT(!XFS_IS_REALTIME_INODE(ip)); > ASSERT(!xfs_get_extsz_hint(ip)); > @@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay( > > XFS_STATS_INC(mp, xs_blk_mapw); > > - if (!(ifp->if_flags & XFS_IFEXTENTS)) { > + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { > error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); > if (error) > goto out_unlock; > @@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay( > > end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > - eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); > + /* > + * Search the data fork fork first to look up our source mapping. We > + * always need the data fork map, as we have to return it to the > + * iomap code so that the higher level write code can read data in to > + * perform read-modify-write cycles for unaligned writes. > + */ > + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); > if (eof) > - got.br_startoff = end_fsb; /* fake hole until the end */ > + imap.br_startoff = end_fsb; /* fake hole until the end */ > > - if (got.br_startoff <= offset_fsb) { > + /* We never need to allocate blocks for zeroing a hole. */ > + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { > + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); > + goto out_unlock; > + } > + > + /* > + * Search the COW fork extent list even if we did not find a data fork > + * extent. This serves two purposes: first this implements the > + * speculative preallocation using cowextisze, so that we also unshare "cowextsize" > + * block adjacent to shared blocks instead of just the shared blocks > + * themselves. Second the lookup in the extent list is generally faster > + * than going out to the shared extent tree. > + */ > + if (xfs_is_reflink_inode(ip)) { > + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, > + &ccur, &cmap); > + if (!cow_eof && cmap.br_startoff <= offset_fsb) { > + trace_xfs_reflink_cow_found(ip, &cmap); > + whichfork = XFS_COW_FORK; > + goto done; > + } > + } > + > + if (imap.br_startoff <= offset_fsb) { > /* > * For reflink files we may need a delalloc reservation when > * overwriting shared extents. This includes zeroing of > * existing extents that contain data. > */ > - if (xfs_is_reflink_inode(ip) && > - ((flags & IOMAP_WRITE) || > - got.br_state != XFS_EXT_UNWRITTEN)) { > - xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); > - error = xfs_reflink_reserve_cow(ip, &got); > - if (error) > - goto out_unlock; > + if (!xfs_is_reflink_inode(ip) || > + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > + &imap); > + goto done; > } > > - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); > - goto done; > - } > + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); > > - if (flags & IOMAP_ZERO) { > - xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); > - goto out_unlock; > + /* Trim the mapping to the nearest shared extent boundary. */ > + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); > + if (error) > + goto out_unlock; > + > + /* Not shared? Just report the (potentially capped) extent. */ > + if (!shared) { > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > + &imap); > + goto done; > + } > + > + /* > + * Fork all the shared blocks from our write offset until the > + * end of the extent. > + */ > + whichfork = XFS_COW_FORK; > + end_fsb = imap.br_startoff + imap.br_blockcount; > + } else { > + /* > + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES > + * pages to keep the chunks of work done where somewhat > + * symmetric with the work writeback does. This is a completely > + * arbitrary number pulled out of thin air. > + * > + * Note that the values needs to be less than 32-bits wide until > + * the lower level functions are updated. > + */ > + count = min_t(loff_t, count, 1024 * PAGE_SIZE); > + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > } > > error = xfs_qm_dqattach_locked(ip, false); > if (error) > goto out_unlock; > > - /* > - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages > - * to keep the chunks of work done where somewhat symmetric with the > - * work writeback does. This is a completely arbitrary number pulled > - * out of thin air as a best guess for initial testing. > - * > - * Note that the values needs to be less than 32-bits wide until > - * the lower level functions are updated. > - */ > - count = min_t(loff_t, count, 1024 * PAGE_SIZE); > - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > - > - if (eof) { > + if (eof && whichfork == XFS_DATA_FORK) { > prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, > &icur); > if (prealloc_blocks) { > @@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay( > } > > retry: > - error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, > - end_fsb - offset_fsb, prealloc_blocks, &got, &icur, > - eof); > + error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, > + end_fsb - offset_fsb, prealloc_blocks, > + whichfork == XFS_DATA_FORK ? &imap : &cmap, > + whichfork == XFS_DATA_FORK ? &icur : &ccur, > + whichfork == XFS_DATA_FORK ? eof : cow_eof); > switch (error) { > case 0: > break; > @@ -659,9 +703,19 @@ xfs_file_iomap_begin_delay( > * them out if the write happens to fail. > */ > iomap->flags |= IOMAP_F_NEW; > - trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); > + trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap); I'm confused by this, if whichfork == COW then won't ftrace report results from the wrong fork? --D > done: > - error = xfs_bmbt_to_iomap(ip, iomap, &got, false); > + if (whichfork == XFS_COW_FORK) { > + if (imap.br_startoff > offset_fsb) { > + xfs_trim_extent(&cmap, offset_fsb, > + imap.br_startoff - offset_fsb); > + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, false); > + goto out_unlock; > + } > + /* ensure we only report blocks we have a reservation for */ > + xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); > + } > + error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); > out_unlock: > xfs_iunlock(ip, XFS_ILOCK_EXCL); > return error; > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index bdbaff1b3fb7..d59b556d42cb 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -234,73 +234,6 @@ xfs_reflink_trim_around_shared( > } > } > > -/* > - * Trim the passed in imap to the next shared/unshared extent boundary, and > - * if imap->br_startoff points to a shared extent reserve space for it in the > - * COW fork. > - * > - * Note that imap will always contain the block numbers for the existing blocks > - * in the data fork, as the upper layers need them for read-modify-write > - * operations. > - */ > -int > -xfs_reflink_reserve_cow( > - struct xfs_inode *ip, > - struct xfs_bmbt_irec *imap) > -{ > - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); > - struct xfs_bmbt_irec got; > - int error = 0; > - bool eof = false; > - struct xfs_iext_cursor icur; > - bool shared; > - > - /* > - * Search the COW fork extent list first. This serves two purposes: > - * first this implement the speculative preallocation using cowextisze, > - * so that we also unshared block adjacent to shared blocks instead > - * of just the shared blocks themselves. Second the lookup in the > - * extent list is generally faster than going out to the shared extent > - * tree. > - */ > - > - if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got)) > - eof = true; > - if (!eof && got.br_startoff <= imap->br_startoff) { > - trace_xfs_reflink_cow_found(ip, imap); > - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); > - return 0; > - } > - > - /* Trim the mapping to the nearest shared extent boundary. */ > - error = xfs_reflink_trim_around_shared(ip, imap, &shared); > - if (error) > - return error; > - > - /* Not shared? Just report the (potentially capped) extent. */ > - if (!shared) > - return 0; > - > - /* > - * Fork all the shared blocks from our write offset until the end of > - * the extent. > - */ > - error = xfs_qm_dqattach_locked(ip, false); > - if (error) > - return error; > - > - error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, > - imap->br_blockcount, 0, &got, &icur, eof); > - if (error == -ENOSPC || error == -EDQUOT) > - trace_xfs_reflink_cow_enospc(ip, imap); > - if (error) > - return error; > - > - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); > - trace_xfs_reflink_cow_alloc(ip, &got); > - return 0; > -} > - > /* Convert part of an unwritten CoW extent to a real one. */ > STATIC int > xfs_reflink_convert_cow_extent( > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > index 870865913bd8..36e74fc90700 100644 > --- a/fs/xfs/xfs_trace.h > +++ b/fs/xfs/xfs_trace.h > @@ -3179,7 +3179,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); > > /* copy on write */ > DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); > -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); > DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); > DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); > DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); > -- > 2.19.1 >