On Thu, Sep 29, 2016 at 08:09:01PM -0700, Darrick J. Wong wrote: > Wire up iomap_begin to detect shared extents and create delayed allocation > extents in the CoW fork: > > 1) Check if we already have an extent in the COW fork for the area. > If so nothing to do, we can move along. > 2) Look up block number for the current extent, and if there is none > it's not shared move along. > 3) Unshare the current extent as far as we are going to write into it. > For this we avoid an additional COW fork lookup and use the > information we set aside in step 1) above. > 4) Goto 1) unless we've covered the whole range. > > Last but not least, this updates the xfs_reflink_reserve_cow_range calling > convention to pass a byte offset and length, as that is what both callers > expect anyway. This patch has been refactored considerably as part of the > iomap transition. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/xfs_iomap.c | 12 ++- > fs/xfs/xfs_reflink.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_reflink.h | 9 ++ > 3 files changed, 221 insertions(+), 2 deletions(-) > > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 59c7beb..e8312b0 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -39,6 +39,7 @@ > #include "xfs_quota.h" > #include "xfs_dquot_item.h" > #include "xfs_dquot.h" > +#include "xfs_reflink.h" > > > #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ > @@ -961,8 +962,15 @@ xfs_file_iomap_begin( > if (XFS_FORCED_SHUTDOWN(mp)) > return -EIO; > > - if ((flags & IOMAP_WRITE) && > - !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { > + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { > + error = xfs_reflink_reserve_cow_range(ip, offset, length); > + if (error < 0) > + return error; > + } > + > + if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && > + !xfs_get_extsz_hint(ip)) { > + /* Reserve delalloc blocks for regular writeback. */ > return xfs_file_iomap_begin_delay(inode, offset, length, flags, > iomap); > } What about the short write case? E.g., do we have to clear out delalloc blocks from the cow fork in iomap_end() if we don't end up using them? > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index 7adbb83..05a7fe6 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -51,6 +51,7 @@ > #include "xfs_btree.h" > #include "xfs_bmap_btree.h" > #include "xfs_reflink.h" > +#include "xfs_iomap.h" > > /* > * Copy on Write of Shared Blocks > @@ -112,3 +113,204 @@ > * ioend structure. Better yet, the more ground we can cover with one > * ioend, the better. > */ > + > +/* > + * Given an AG extent, find the lowest-numbered run of shared blocks within > + * that range and return the range in fbno/flen. > + */ > +int > +xfs_reflink_find_shared( > + struct xfs_mount *mp, > + xfs_agnumber_t agno, > + xfs_agblock_t agbno, > + xfs_extlen_t aglen, > + xfs_agblock_t *fbno, > + xfs_extlen_t *flen, > + bool find_maximal) > +{ > + struct xfs_buf *agbp; > + struct xfs_btree_cur *cur; > + int error; > + > + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); > + if (error) > + return error; > + > + cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); > + > + error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen, > + find_maximal); > + > + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); > + > + xfs_buf_relse(agbp); > + return error; > +} > + > +/* > + * Trim the mapping to the next block where there's a change in the > + * shared/unshared status. More specifically, this means that we > + * find the lowest-numbered extent of shared blocks that coincides with > + * the given block mapping. If the shared extent overlaps the start of > + * the mapping, trim the mapping to the end of the shared extent. If > + * the shared region intersects the mapping, trim the mapping to the > + * start of the shared extent. If there are no shared regions that > + * overlap, just return the original extent. > + */ > +int > +xfs_reflink_trim_around_shared( > + struct xfs_inode *ip, > + struct xfs_bmbt_irec *irec, > + bool *shared, > + bool *trimmed) > +{ > + xfs_agnumber_t agno; > + xfs_agblock_t agbno; > + xfs_extlen_t aglen; > + xfs_agblock_t fbno; > + xfs_extlen_t flen; > + int error = 0; > + > + /* Holes, unwritten, and delalloc extents cannot be shared */ > + if (!xfs_is_reflink_inode(ip) || > + ISUNWRITTEN(irec) || > + irec->br_startblock == HOLESTARTBLOCK || > + irec->br_startblock == DELAYSTARTBLOCK) { > + *shared = false; > + return 0; > + } > + > + trace_xfs_reflink_trim_around_shared(ip, irec); > + > + agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock); > + agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock); > + aglen = irec->br_blockcount; > + > + error = xfs_reflink_find_shared(ip->i_mount, agno, agbno, > + aglen, &fbno, &flen, true); > + if (error) > + return error; > + > + *shared = *trimmed = false; > + if (flen == 0) { Preferable to use NULLAGBLOCK for this, imo. > + /* No shared blocks at all. */ > + return 0; > + } else if (fbno == agbno) { > + /* The start of this extent is shared. */ > + irec->br_blockcount = flen; > + *shared = true; > + *trimmed = true; Why do we set trimmed based solely on fbno == agbno? Is that valid if the bmapbt extent exactly matches the refcntbt extent and we thus don't actually modify the extent (e.g., br_blockcount == flen)? It's hard to tell because trimmed looks unused (to this point?), so I could just misunderstand the meaning. > + return 0; > + } else { > + /* There's a shared extent midway through this extent. */ > + irec->br_blockcount = fbno - agbno; Don't we have to push the startblock forward in this case? Oh, I see. We trim the unshared length to push the fileoffset fsb to the start of the shared region for the next iteration. Brian > + *trimmed = true; > + return 0; > + } > +} > + > +/* Create a CoW reservation for a range of blocks within a file. */ > +static int > +__xfs_reflink_reserve_cow( > + struct xfs_inode *ip, > + xfs_fileoff_t *offset_fsb, > + xfs_fileoff_t end_fsb) > +{ > + struct xfs_bmbt_irec got, prev, imap; > + xfs_fileoff_t orig_end_fsb; > + int nimaps, eof = 0, error = 0; > + bool shared = false, trimmed = false; > + xfs_extnum_t idx; > + > + /* Already reserved? Skip the refcount btree access. */ > + xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, > + &got, &prev); > + if (!eof && got.br_startoff <= *offset_fsb) { > + end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; > + trace_xfs_reflink_cow_found(ip, &got); > + goto done; > + } > + > + /* Read extent from the source file. */ > + nimaps = 1; > + error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, > + &imap, &nimaps, 0); > + if (error) > + goto out_unlock; > + ASSERT(nimaps == 1); > + > + /* Trim the mapping to the nearest shared extent boundary. */ > + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); > + if (error) > + goto out_unlock; > + > + end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; > + > + /* Not shared? Just report the (potentially capped) extent. */ > + if (!shared) > + goto done; > + > + /* > + * Fork all the shared blocks from our write offset until the end of > + * the extent. > + */ > + error = xfs_qm_dqattach_locked(ip, 0); > + if (error) > + goto out_unlock; > + > +retry: > + error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, > + end_fsb - *offset_fsb, &got, > + &prev, &idx, eof); > + switch (error) { > + case 0: > + break; > + case -ENOSPC: > + case -EDQUOT: > + /* retry without any preallocation */ > + trace_xfs_reflink_cow_enospc(ip, &imap); > + if (end_fsb != orig_end_fsb) { > + end_fsb = orig_end_fsb; > + goto retry; > + } > + /*FALLTHRU*/ > + default: > + goto out_unlock; > + } > + > + trace_xfs_reflink_cow_alloc(ip, &got); > +done: > + *offset_fsb = end_fsb; > +out_unlock: > + return error; > +} > + > +/* Create a CoW reservation for part of a file. */ > +int > +xfs_reflink_reserve_cow_range( > + struct xfs_inode *ip, > + xfs_off_t offset, > + xfs_off_t count) > +{ > + struct xfs_mount *mp = ip->i_mount; > + xfs_fileoff_t offset_fsb, end_fsb; > + int error; > + > + trace_xfs_reflink_reserve_cow_range(ip, offset, count); > + > + offset_fsb = XFS_B_TO_FSBT(mp, offset); > + end_fsb = XFS_B_TO_FSB(mp, offset + count); > + > + xfs_ilock(ip, XFS_ILOCK_EXCL); > + while (offset_fsb < end_fsb) { > + error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb); > + if (error) { > + trace_xfs_reflink_reserve_cow_range_error(ip, error, > + _RET_IP_); > + break; > + } > + } > + xfs_iunlock(ip, XFS_ILOCK_EXCL); > + > + return error; > +} > diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h > index 820b151..f824f87 100644 > --- a/fs/xfs/xfs_reflink.h > +++ b/fs/xfs/xfs_reflink.h > @@ -20,4 +20,13 @@ > #ifndef __XFS_REFLINK_H > #define __XFS_REFLINK_H 1 > > +extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, > + xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, > + xfs_extlen_t *flen, bool find_maximal); > +extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, > + struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); > + > +extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, > + xfs_off_t offset, xfs_off_t count); > + > #endif /* __XFS_REFLINK_H */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html