Re: [PATCH 33/63] xfs: allocate delayed extents in CoW fork

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 04, 2016 at 12:38:40PM -0400, Brian Foster wrote:
> On Thu, Sep 29, 2016 at 08:09:14PM -0700, Darrick J. Wong wrote:
> > Modify the writepage handler to find and convert pending delalloc
> > extents to real allocations.  Furthermore, when we're doing non-cow
> > writes to a part of a file that already has a CoW reservation (the
> > cowextsz hint that we set up in a subsequent patch facilitates this),
> > promote the write to copy-on-write so that the entire extent can get
> > written out as a single extent on disk, thereby reducing post-CoW
> > fragmentation.
> > 
> > Christoph moved the CoW support code in _map_blocks to a separate helper
> > function, refactored other functions, and reduced the number of CoW fork
> > lookups, so I merged those changes here to reduce churn.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> > ---
> >  fs/xfs/xfs_aops.c    |  106 ++++++++++++++++++++++++++++++++++++++++----------
> >  fs/xfs/xfs_aops.h    |    4 +-
> >  fs/xfs/xfs_reflink.c |   86 +++++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/xfs_reflink.h |    4 ++
> >  4 files changed, 178 insertions(+), 22 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> > index 007a520..7b1e9de 100644
> > --- a/fs/xfs/xfs_aops.c
> > +++ b/fs/xfs/xfs_aops.c
> ...
> > @@ -645,13 +653,16 @@ xfs_check_page_type(
> >  	bh = head = page_buffers(page);
> >  	do {
> >  		if (buffer_unwritten(bh)) {
> > -			if (type == XFS_IO_UNWRITTEN)
> > +			if (type == XFS_IO_UNWRITTEN ||
> > +			    type == XFS_IO_COW)
> >  				return true;
> >  		} else if (buffer_delay(bh)) {
> > -			if (type == XFS_IO_DELALLOC)
> > +			if (type == XFS_IO_DELALLOC ||
> > +			    type == XFS_IO_COW)
> >  				return true;
> >  		} else if (buffer_dirty(bh) && buffer_mapped(bh)) {
> > -			if (type == XFS_IO_OVERWRITE)
> > +			if (type == XFS_IO_OVERWRITE ||
> > +			    type == XFS_IO_COW)
> >  				return true;
> >  		}
> 
> What's the purpose of this hunk? As it is, we don't appear to have any
> non-XFS_IO_DELALLOC callers. This probably warrants an update to the
> top-of-function comment at the very least.

Hmmmmm, originally these hunks /did/ actually help us to promote any
write that also had a CoW fork reservation into a copy-on-write to
reduce fragmentation, but with the iomap rework I think I can drop
this hunk since there's only one caller of this function now.

--D

> 
> Brian
> 
> >  
> > @@ -739,6 +750,56 @@ xfs_aops_discard_page(
> >  	return;
> >  }
> >  
> > +static int
> > +xfs_map_cow(
> > +	struct xfs_writepage_ctx *wpc,
> > +	struct inode		*inode,
> > +	loff_t			offset,
> > +	unsigned int		*new_type)
> > +{
> > +	struct xfs_inode	*ip = XFS_I(inode);
> > +	struct xfs_bmbt_irec	imap;
> > +	bool			is_cow = false, need_alloc = false;
> > +	int			error;
> > +
> > +	/*
> > +	 * If we already have a valid COW mapping keep using it.
> > +	 */
> > +	if (wpc->io_type == XFS_IO_COW) {
> > +		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
> > +		if (wpc->imap_valid) {
> > +			*new_type = XFS_IO_COW;
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	/*
> > +	 * Else we need to check if there is a COW mapping at this offset.
> > +	 */
> > +	xfs_ilock(ip, XFS_ILOCK_SHARED);
> > +	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
> > +	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> > +
> > +	if (!is_cow)
> > +		return 0;
> > +
> > +	/*
> > +	 * And if the COW mapping has a delayed extent here we need to
> > +	 * allocate real space for it now.
> > +	 */
> > +	if (need_alloc) {
> > +		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
> > +				&imap);
> > +		if (error)
> > +			return error;
> > +	}
> > +
> > +	wpc->io_type = *new_type = XFS_IO_COW;
> > +	wpc->imap_valid = true;
> > +	wpc->imap = imap;
> > +	return 0;
> > +}
> > +
> >  /*
> >   * We implement an immediate ioend submission policy here to avoid needing to
> >   * chain multiple ioends and hence nest mempool allocations which can violate
> > @@ -771,6 +832,7 @@ xfs_writepage_map(
> >  	int			error = 0;
> >  	int			count = 0;
> >  	int			uptodate = 1;
> > +	unsigned int		new_type;
> >  
> >  	bh = head = page_buffers(page);
> >  	offset = page_offset(page);
> > @@ -791,22 +853,13 @@ xfs_writepage_map(
> >  			continue;
> >  		}
> >  
> > -		if (buffer_unwritten(bh)) {
> > -			if (wpc->io_type != XFS_IO_UNWRITTEN) {
> > -				wpc->io_type = XFS_IO_UNWRITTEN;
> > -				wpc->imap_valid = false;
> > -			}
> > -		} else if (buffer_delay(bh)) {
> > -			if (wpc->io_type != XFS_IO_DELALLOC) {
> > -				wpc->io_type = XFS_IO_DELALLOC;
> > -				wpc->imap_valid = false;
> > -			}
> > -		} else if (buffer_uptodate(bh)) {
> > -			if (wpc->io_type != XFS_IO_OVERWRITE) {
> > -				wpc->io_type = XFS_IO_OVERWRITE;
> > -				wpc->imap_valid = false;
> > -			}
> > -		} else {
> > +		if (buffer_unwritten(bh))
> > +			new_type = XFS_IO_UNWRITTEN;
> > +		else if (buffer_delay(bh))
> > +			new_type = XFS_IO_DELALLOC;
> > +		else if (buffer_uptodate(bh))
> > +			new_type = XFS_IO_OVERWRITE;
> > +		else {
> >  			if (PageUptodate(page))
> >  				ASSERT(buffer_mapped(bh));
> >  			/*
> > @@ -819,6 +872,17 @@ xfs_writepage_map(
> >  			continue;
> >  		}
> >  
> > +		if (xfs_is_reflink_inode(XFS_I(inode))) {
> > +			error = xfs_map_cow(wpc, inode, offset, &new_type);
> > +			if (error)
> > +				goto out;
> > +		}
> > +
> > +		if (wpc->io_type != new_type) {
> > +			wpc->io_type = new_type;
> > +			wpc->imap_valid = false;
> > +		}
> > +
> >  		if (wpc->imap_valid)
> >  			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
> >  							 offset);
> > diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> > index 1950e3b..b3c6634 100644
> > --- a/fs/xfs/xfs_aops.h
> > +++ b/fs/xfs/xfs_aops.h
> > @@ -28,13 +28,15 @@ enum {
> >  	XFS_IO_DELALLOC,	/* covers delalloc region */
> >  	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
> >  	XFS_IO_OVERWRITE,	/* covers already allocated extent */
> > +	XFS_IO_COW,		/* covers copy-on-write extent */
> >  };
> >  
> >  #define XFS_IO_TYPES \
> >  	{ XFS_IO_INVALID,		"invalid" }, \
> >  	{ XFS_IO_DELALLOC,		"delalloc" }, \
> >  	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
> > -	{ XFS_IO_OVERWRITE,		"overwrite" }
> > +	{ XFS_IO_OVERWRITE,		"overwrite" }, \
> > +	{ XFS_IO_COW,			"CoW" }
> >  
> >  /*
> >   * Structure for buffered I/O completions.
> > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> > index 05a7fe6..e8c7c85 100644
> > --- a/fs/xfs/xfs_reflink.c
> > +++ b/fs/xfs/xfs_reflink.c
> > @@ -314,3 +314,89 @@ xfs_reflink_reserve_cow_range(
> >  
> >  	return error;
> >  }
> > +
> > +/*
> > + * Find the CoW reservation (and whether or not it needs block allocation)
> > + * for a given byte offset of a file.
> > + */
> > +bool
> > +xfs_reflink_find_cow_mapping(
> > +	struct xfs_inode		*ip,
> > +	xfs_off_t			offset,
> > +	struct xfs_bmbt_irec		*imap,
> > +	bool				*need_alloc)
> > +{
> > +	struct xfs_bmbt_irec		irec;
> > +	struct xfs_ifork		*ifp;
> > +	struct xfs_bmbt_rec_host	*gotp;
> > +	xfs_fileoff_t			bno;
> > +	xfs_extnum_t			idx;
> > +
> > +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
> > +
> > +	if (!xfs_is_reflink_inode(ip))
> > +		return false;
> > +
> > +	/* Find the extent in the CoW fork. */
> > +	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
> > +	bno = XFS_B_TO_FSBT(ip->i_mount, offset);
> > +	gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
> > +	if (!gotp)
> > +		return false;
> > +
> > +	xfs_bmbt_get_all(gotp, &irec);
> > +	if (bno >= irec.br_startoff + irec.br_blockcount ||
> > +	    bno < irec.br_startoff)
> > +		return false;
> > +
> > +	trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
> > +			&irec);
> > +
> > +	/* If it's still delalloc, we must allocate later. */
> > +	*imap = irec;
> > +	*need_alloc = !!(isnullstartblock(irec.br_startblock));
> > +
> > +	return true;
> > +}
> > +
> > +/*
> > + * Trim an extent to end at the next CoW reservation past offset_fsb.
> > + */
> > +int
> > +xfs_reflink_trim_irec_to_next_cow(
> > +	struct xfs_inode		*ip,
> > +	xfs_fileoff_t			offset_fsb,
> > +	struct xfs_bmbt_irec		*imap)
> > +{
> > +	struct xfs_bmbt_irec		irec;
> > +	struct xfs_ifork		*ifp;
> > +	struct xfs_bmbt_rec_host	*gotp;
> > +	xfs_extnum_t			idx;
> > +
> > +	if (!xfs_is_reflink_inode(ip))
> > +		return 0;
> > +
> > +	/* Find the extent in the CoW fork. */
> > +	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
> > +	gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
> > +	if (!gotp)
> > +		return 0;
> > +	xfs_bmbt_get_all(gotp, &irec);
> > +
> > +	/* This is the extent before; try sliding up one. */
> > +	if (irec.br_startoff < offset_fsb) {
> > +		idx++;
> > +		if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
> > +			return 0;
> > +		gotp = xfs_iext_get_ext(ifp, idx);
> > +		xfs_bmbt_get_all(gotp, &irec);
> > +	}
> > +
> > +	if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
> > +		return 0;
> > +
> > +	imap->br_blockcount = irec.br_startoff - imap->br_startoff;
> > +	trace_xfs_reflink_trim_irec(ip, imap);
> > +
> > +	return 0;
> > +}
> > diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> > index f824f87..11408c0 100644
> > --- a/fs/xfs/xfs_reflink.h
> > +++ b/fs/xfs/xfs_reflink.h
> > @@ -28,5 +28,9 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
> >  
> >  extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
> >  		xfs_off_t offset, xfs_off_t count);
> > +extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
> > +		struct xfs_bmbt_irec *imap, bool *need_alloc);
> > +extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
> > +		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap);
> >  
> >  #endif /* __XFS_REFLINK_H */
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux