On Thu, Sep 29, 2016 at 08:09:14PM -0700, Darrick J. Wong wrote: > Modify the writepage handler to find and convert pending delalloc > extents to real allocations. Furthermore, when we're doing non-cow > writes to a part of a file that already has a CoW reservation (the > cowextsz hint that we set up in a subsequent patch facilitates this), > promote the write to copy-on-write so that the entire extent can get > written out as a single extent on disk, thereby reducing post-CoW > fragmentation. > > Christoph moved the CoW support code in _map_blocks to a separate helper > function, refactored other functions, and reduced the number of CoW fork > lookups, so I merged those changes here to reduce churn. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/xfs_aops.c | 106 ++++++++++++++++++++++++++++++++++++++++---------- > fs/xfs/xfs_aops.h | 4 +- > fs/xfs/xfs_reflink.c | 86 +++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_reflink.h | 4 ++ > 4 files changed, 178 insertions(+), 22 deletions(-) > > > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index 007a520..7b1e9de 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c ... > @@ -645,13 +653,16 @@ xfs_check_page_type( > bh = head = page_buffers(page); > do { > if (buffer_unwritten(bh)) { > - if (type == XFS_IO_UNWRITTEN) > + if (type == XFS_IO_UNWRITTEN || > + type == XFS_IO_COW) > return true; > } else if (buffer_delay(bh)) { > - if (type == XFS_IO_DELALLOC) > + if (type == XFS_IO_DELALLOC || > + type == XFS_IO_COW) > return true; > } else if (buffer_dirty(bh) && buffer_mapped(bh)) { > - if (type == XFS_IO_OVERWRITE) > + if (type == XFS_IO_OVERWRITE || > + type == XFS_IO_COW) > return true; > } What's the purpose of this hunk? As it is, we don't appear to have any non-XFS_IO_DELALLOC callers. This probably warrants an update to the top-of-function comment at the very least. Brian > > @@ -739,6 +750,56 @@ xfs_aops_discard_page( > return; > } > > +static int > +xfs_map_cow( > + struct xfs_writepage_ctx *wpc, > + struct inode *inode, > + loff_t offset, > + unsigned int *new_type) > +{ > + struct xfs_inode *ip = XFS_I(inode); > + struct xfs_bmbt_irec imap; > + bool is_cow = false, need_alloc = false; > + int error; > + > + /* > + * If we already have a valid COW mapping keep using it. > + */ > + if (wpc->io_type == XFS_IO_COW) { > + wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); > + if (wpc->imap_valid) { > + *new_type = XFS_IO_COW; > + return 0; > + } > + } > + > + /* > + * Else we need to check if there is a COW mapping at this offset. > + */ > + xfs_ilock(ip, XFS_ILOCK_SHARED); > + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); > + xfs_iunlock(ip, XFS_ILOCK_SHARED); > + > + if (!is_cow) > + return 0; > + > + /* > + * And if the COW mapping has a delayed extent here we need to > + * allocate real space for it now. > + */ > + if (need_alloc) { > + error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, > + &imap); > + if (error) > + return error; > + } > + > + wpc->io_type = *new_type = XFS_IO_COW; > + wpc->imap_valid = true; > + wpc->imap = imap; > + return 0; > +} > + > /* > * We implement an immediate ioend submission policy here to avoid needing to > * chain multiple ioends and hence nest mempool allocations which can violate > @@ -771,6 +832,7 @@ xfs_writepage_map( > int error = 0; > int count = 0; > int uptodate = 1; > + unsigned int new_type; > > bh = head = page_buffers(page); > offset = page_offset(page); > @@ -791,22 +853,13 @@ xfs_writepage_map( > continue; > } > > - if (buffer_unwritten(bh)) { > - if (wpc->io_type != XFS_IO_UNWRITTEN) { > - wpc->io_type = XFS_IO_UNWRITTEN; > - wpc->imap_valid = false; > - } > - } else if (buffer_delay(bh)) { > - if (wpc->io_type != XFS_IO_DELALLOC) { > - wpc->io_type = XFS_IO_DELALLOC; > - wpc->imap_valid = false; > - } > - } else if (buffer_uptodate(bh)) { > - if (wpc->io_type != XFS_IO_OVERWRITE) { > - wpc->io_type = XFS_IO_OVERWRITE; > - wpc->imap_valid = false; > - } > - } else { > + if (buffer_unwritten(bh)) > + new_type = XFS_IO_UNWRITTEN; > + else if (buffer_delay(bh)) > + new_type = XFS_IO_DELALLOC; > + else if (buffer_uptodate(bh)) > + new_type = XFS_IO_OVERWRITE; > + else { > if (PageUptodate(page)) > ASSERT(buffer_mapped(bh)); > /* > @@ -819,6 +872,17 @@ xfs_writepage_map( > continue; > } > > + if (xfs_is_reflink_inode(XFS_I(inode))) { > + error = xfs_map_cow(wpc, inode, offset, &new_type); > + if (error) > + goto out; > + } > + > + if (wpc->io_type != new_type) { > + wpc->io_type = new_type; > + wpc->imap_valid = false; > + } > + > if (wpc->imap_valid) > wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, > offset); > diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h > index 1950e3b..b3c6634 100644 > --- a/fs/xfs/xfs_aops.h > +++ b/fs/xfs/xfs_aops.h > @@ -28,13 +28,15 @@ enum { > XFS_IO_DELALLOC, /* covers delalloc region */ > XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ > XFS_IO_OVERWRITE, /* covers already allocated extent */ > + XFS_IO_COW, /* covers copy-on-write extent */ > }; > > #define XFS_IO_TYPES \ > { XFS_IO_INVALID, "invalid" }, \ > { XFS_IO_DELALLOC, "delalloc" }, \ > { XFS_IO_UNWRITTEN, "unwritten" }, \ > - { XFS_IO_OVERWRITE, "overwrite" } > + { XFS_IO_OVERWRITE, "overwrite" }, \ > + { XFS_IO_COW, "CoW" } > > /* > * Structure for buffered I/O completions. > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index 05a7fe6..e8c7c85 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -314,3 +314,89 @@ xfs_reflink_reserve_cow_range( > > return error; > } > + > +/* > + * Find the CoW reservation (and whether or not it needs block allocation) > + * for a given byte offset of a file. > + */ > +bool > +xfs_reflink_find_cow_mapping( > + struct xfs_inode *ip, > + xfs_off_t offset, > + struct xfs_bmbt_irec *imap, > + bool *need_alloc) > +{ > + struct xfs_bmbt_irec irec; > + struct xfs_ifork *ifp; > + struct xfs_bmbt_rec_host *gotp; > + xfs_fileoff_t bno; > + xfs_extnum_t idx; > + > + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); > + > + if (!xfs_is_reflink_inode(ip)) > + return false; > + > + /* Find the extent in the CoW fork. */ > + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); > + bno = XFS_B_TO_FSBT(ip->i_mount, offset); > + gotp = xfs_iext_bno_to_ext(ifp, bno, &idx); > + if (!gotp) > + return false; > + > + xfs_bmbt_get_all(gotp, &irec); > + if (bno >= irec.br_startoff + irec.br_blockcount || > + bno < irec.br_startoff) > + return false; > + > + trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, > + &irec); > + > + /* If it's still delalloc, we must allocate later. */ > + *imap = irec; > + *need_alloc = !!(isnullstartblock(irec.br_startblock)); > + > + return true; > +} > + > +/* > + * Trim an extent to end at the next CoW reservation past offset_fsb. > + */ > +int > +xfs_reflink_trim_irec_to_next_cow( > + struct xfs_inode *ip, > + xfs_fileoff_t offset_fsb, > + struct xfs_bmbt_irec *imap) > +{ > + struct xfs_bmbt_irec irec; > + struct xfs_ifork *ifp; > + struct xfs_bmbt_rec_host *gotp; > + xfs_extnum_t idx; > + > + if (!xfs_is_reflink_inode(ip)) > + return 0; > + > + /* Find the extent in the CoW fork. */ > + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); > + gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx); > + if (!gotp) > + return 0; > + xfs_bmbt_get_all(gotp, &irec); > + > + /* This is the extent before; try sliding up one. */ > + if (irec.br_startoff < offset_fsb) { > + idx++; > + if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) > + return 0; > + gotp = xfs_iext_get_ext(ifp, idx); > + xfs_bmbt_get_all(gotp, &irec); > + } > + > + if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount) > + return 0; > + > + imap->br_blockcount = irec.br_startoff - imap->br_startoff; > + trace_xfs_reflink_trim_irec(ip, imap); > + > + return 0; > +} > diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h > index f824f87..11408c0 100644 > --- a/fs/xfs/xfs_reflink.h > +++ b/fs/xfs/xfs_reflink.h > @@ -28,5 +28,9 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, > > extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, > xfs_off_t offset, xfs_off_t count); > +extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, > + struct xfs_bmbt_irec *imap, bool *need_alloc); > +extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, > + xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap); > > #endif /* __XFS_REFLINK_H */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html