On Tue, Oct 04, 2016 at 11:26:37AM -0700, Darrick J. Wong wrote: > On Tue, Oct 04, 2016 at 12:38:40PM -0400, Brian Foster wrote: > > On Thu, Sep 29, 2016 at 08:09:14PM -0700, Darrick J. Wong wrote: > > > Modify the writepage handler to find and convert pending delalloc > > > extents to real allocations. Furthermore, when we're doing non-cow > > > writes to a part of a file that already has a CoW reservation (the > > > cowextsz hint that we set up in a subsequent patch facilitates this), > > > promote the write to copy-on-write so that the entire extent can get > > > written out as a single extent on disk, thereby reducing post-CoW > > > fragmentation. > > > > > > Christoph moved the CoW support code in _map_blocks to a separate helper > > > function, refactored other functions, and reduced the number of CoW fork > > > lookups, so I merged those changes here to reduce churn. > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > > --- > > > fs/xfs/xfs_aops.c | 106 ++++++++++++++++++++++++++++++++++++++++---------- > > > fs/xfs/xfs_aops.h | 4 +- > > > fs/xfs/xfs_reflink.c | 86 +++++++++++++++++++++++++++++++++++++++++ > > > fs/xfs/xfs_reflink.h | 4 ++ > > > 4 files changed, 178 insertions(+), 22 deletions(-) > > > > > > > > > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > > > index 007a520..7b1e9de 100644 > > > --- a/fs/xfs/xfs_aops.c > > > +++ b/fs/xfs/xfs_aops.c > > ... > > > @@ -645,13 +653,16 @@ xfs_check_page_type( > > > bh = head = page_buffers(page); > > > do { > > > if (buffer_unwritten(bh)) { > > > - if (type == XFS_IO_UNWRITTEN) > > > + if (type == XFS_IO_UNWRITTEN || > > > + type == XFS_IO_COW) > > > return true; > > > } else if (buffer_delay(bh)) { > > > - if (type == XFS_IO_DELALLOC) > > > + if (type == XFS_IO_DELALLOC || > > > + type == XFS_IO_COW) > > > return true; > > > } else if (buffer_dirty(bh) && buffer_mapped(bh)) { > > > - if (type == XFS_IO_OVERWRITE) > > > + if (type == XFS_IO_OVERWRITE || > > > + type == XFS_IO_COW) > > > return true; > > > } > > > > What's the purpose of this hunk? As it is, we don't appear to have any > > non-XFS_IO_DELALLOC callers. This probably warrants an update to the > > top-of-function comment at the very least. > > Hmmmmm, originally these hunks /did/ actually help us to promote any > write that also had a CoW fork reservation into a copy-on-write to > reduce fragmentation, but with the iomap rework I think I can drop > this hunk since there's only one caller of this function now. > Ah, right. That makes sense. Thanks. Brian > --D > > > > > Brian > > > > > > > > @@ -739,6 +750,56 @@ xfs_aops_discard_page( > > > return; > > > } > > > > > > +static int > > > +xfs_map_cow( > > > + struct xfs_writepage_ctx *wpc, > > > + struct inode *inode, > > > + loff_t offset, > > > + unsigned int *new_type) > > > +{ > > > + struct xfs_inode *ip = XFS_I(inode); > > > + struct xfs_bmbt_irec imap; > > > + bool is_cow = false, need_alloc = false; > > > + int error; > > > + > > > + /* > > > + * If we already have a valid COW mapping keep using it. > > > + */ > > > + if (wpc->io_type == XFS_IO_COW) { > > > + wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); > > > + if (wpc->imap_valid) { > > > + *new_type = XFS_IO_COW; > > > + return 0; > > > + } > > > + } > > > + > > > + /* > > > + * Else we need to check if there is a COW mapping at this offset. > > > + */ > > > + xfs_ilock(ip, XFS_ILOCK_SHARED); > > > + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); > > > + xfs_iunlock(ip, XFS_ILOCK_SHARED); > > > + > > > + if (!is_cow) > > > + return 0; > > > + > > > + /* > > > + * And if the COW mapping has a delayed extent here we need to > > > + * allocate real space for it now. > > > + */ > > > + if (need_alloc) { > > > + error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, > > > + &imap); > > > + if (error) > > > + return error; > > > + } > > > + > > > + wpc->io_type = *new_type = XFS_IO_COW; > > > + wpc->imap_valid = true; > > > + wpc->imap = imap; > > > + return 0; > > > +} > > > + > > > /* > > > * We implement an immediate ioend submission policy here to avoid needing to > > > * chain multiple ioends and hence nest mempool allocations which can violate > > > @@ -771,6 +832,7 @@ xfs_writepage_map( > > > int error = 0; > > > int count = 0; > > > int uptodate = 1; > > > + unsigned int new_type; > > > > > > bh = head = page_buffers(page); > > > offset = page_offset(page); > > > @@ -791,22 +853,13 @@ xfs_writepage_map( > > > continue; > > > } > > > > > > - if (buffer_unwritten(bh)) { > > > - if (wpc->io_type != XFS_IO_UNWRITTEN) { > > > - wpc->io_type = XFS_IO_UNWRITTEN; > > > - wpc->imap_valid = false; > > > - } > > > - } else if (buffer_delay(bh)) { > > > - if (wpc->io_type != XFS_IO_DELALLOC) { > > > - wpc->io_type = XFS_IO_DELALLOC; > > > - wpc->imap_valid = false; > > > - } > > > - } else if (buffer_uptodate(bh)) { > > > - if (wpc->io_type != XFS_IO_OVERWRITE) { > > > - wpc->io_type = XFS_IO_OVERWRITE; > > > - wpc->imap_valid = false; > > > - } > > > - } else { > > > + if (buffer_unwritten(bh)) > > > + new_type = XFS_IO_UNWRITTEN; > > > + else if (buffer_delay(bh)) > > > + new_type = XFS_IO_DELALLOC; > > > + else if (buffer_uptodate(bh)) > > > + new_type = XFS_IO_OVERWRITE; > > > + else { > > > if (PageUptodate(page)) > > > ASSERT(buffer_mapped(bh)); > > > /* > > > @@ -819,6 +872,17 @@ xfs_writepage_map( > > > continue; > > > } > > > > > > + if (xfs_is_reflink_inode(XFS_I(inode))) { > > > + error = xfs_map_cow(wpc, inode, offset, &new_type); > > > + if (error) > > > + goto out; > > > + } > > > + > > > + if (wpc->io_type != new_type) { > > > + wpc->io_type = new_type; > > > + wpc->imap_valid = false; > > > + } > > > + > > > if (wpc->imap_valid) > > > wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, > > > offset); > > > diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h > > > index 1950e3b..b3c6634 100644 > > > --- a/fs/xfs/xfs_aops.h > > > +++ b/fs/xfs/xfs_aops.h > > > @@ -28,13 +28,15 @@ enum { > > > XFS_IO_DELALLOC, /* covers delalloc region */ > > > XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ > > > XFS_IO_OVERWRITE, /* covers already allocated extent */ > > > + XFS_IO_COW, /* covers copy-on-write extent */ > > > }; > > > > > > #define XFS_IO_TYPES \ > > > { XFS_IO_INVALID, "invalid" }, \ > > > { XFS_IO_DELALLOC, "delalloc" }, \ > > > { XFS_IO_UNWRITTEN, "unwritten" }, \ > > > - { XFS_IO_OVERWRITE, "overwrite" } > > > + { XFS_IO_OVERWRITE, "overwrite" }, \ > > > + { XFS_IO_COW, "CoW" } > > > > > > /* > > > * Structure for buffered I/O completions. > > > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > > > index 05a7fe6..e8c7c85 100644 > > > --- a/fs/xfs/xfs_reflink.c > > > +++ b/fs/xfs/xfs_reflink.c > > > @@ -314,3 +314,89 @@ xfs_reflink_reserve_cow_range( > > > > > > return error; > > > } > > > + > > > +/* > > > + * Find the CoW reservation (and whether or not it needs block allocation) > > > + * for a given byte offset of a file. > > > + */ > > > +bool > > > +xfs_reflink_find_cow_mapping( > > > + struct xfs_inode *ip, > > > + xfs_off_t offset, > > > + struct xfs_bmbt_irec *imap, > > > + bool *need_alloc) > > > +{ > > > + struct xfs_bmbt_irec irec; > > > + struct xfs_ifork *ifp; > > > + struct xfs_bmbt_rec_host *gotp; > > > + xfs_fileoff_t bno; > > > + xfs_extnum_t idx; > > > + > > > + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); > > > + > > > + if (!xfs_is_reflink_inode(ip)) > > > + return false; > > > + > > > + /* Find the extent in the CoW fork. */ > > > + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); > > > + bno = XFS_B_TO_FSBT(ip->i_mount, offset); > > > + gotp = xfs_iext_bno_to_ext(ifp, bno, &idx); > > > + if (!gotp) > > > + return false; > > > + > > > + xfs_bmbt_get_all(gotp, &irec); > > > + if (bno >= irec.br_startoff + irec.br_blockcount || > > > + bno < irec.br_startoff) > > > + return false; > > > + > > > + trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, > > > + &irec); > > > + > > > + /* If it's still delalloc, we must allocate later. */ > > > + *imap = irec; > > > + *need_alloc = !!(isnullstartblock(irec.br_startblock)); > > > + > > > + return true; > > > +} > > > + > > > +/* > > > + * Trim an extent to end at the next CoW reservation past offset_fsb. > > > + */ > > > +int > > > +xfs_reflink_trim_irec_to_next_cow( > > > + struct xfs_inode *ip, > > > + xfs_fileoff_t offset_fsb, > > > + struct xfs_bmbt_irec *imap) > > > +{ > > > + struct xfs_bmbt_irec irec; > > > + struct xfs_ifork *ifp; > > > + struct xfs_bmbt_rec_host *gotp; > > > + xfs_extnum_t idx; > > > + > > > + if (!xfs_is_reflink_inode(ip)) > > > + return 0; > > > + > > > + /* Find the extent in the CoW fork. */ > > > + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); > > > + gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx); > > > + if (!gotp) > > > + return 0; > > > + xfs_bmbt_get_all(gotp, &irec); > > > + > > > + /* This is the extent before; try sliding up one. */ > > > + if (irec.br_startoff < offset_fsb) { > > > + idx++; > > > + if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) > > > + return 0; > > > + gotp = xfs_iext_get_ext(ifp, idx); > > > + xfs_bmbt_get_all(gotp, &irec); > > > + } > > > + > > > + if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount) > > > + return 0; > > > + > > > + imap->br_blockcount = irec.br_startoff - imap->br_startoff; > > > + trace_xfs_reflink_trim_irec(ip, imap); > > > + > > > + return 0; > > > +} > > > diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h > > > index f824f87..11408c0 100644 > > > --- a/fs/xfs/xfs_reflink.h > > > +++ b/fs/xfs/xfs_reflink.h > > > @@ -28,5 +28,9 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, > > > > > > extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, > > > xfs_off_t offset, xfs_off_t count); > > > +extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, > > > + struct xfs_bmbt_irec *imap, bool *need_alloc); > > > +extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, > > > + xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap); > > > > > > #endif /* __XFS_REFLINK_H */ > > > > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html