Re: [PATCH 08/11] xfs: merge COW handling into xfs_file_iomap_begin_delay

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 19, 2018 at 08:38:43PM +0100, Christoph Hellwig wrote:
> On Tue, Dec 18, 2018 at 03:36:41PM -0800, Darrick J. Wong wrote:
> > On Mon, Dec 03, 2018 at 05:25:00PM -0500, Christoph Hellwig wrote:
> > > Besides simplifying the code a bit this allows to actually implement
> > > the behavior of using COW preallocation for non-COW data mentioned
> > > in the current comments.
> > > 
> > > Note that this breaks the current version of xfs/420, but that is
> > > because the test is broken.  A separate fix will be sent for it.
> > 
> > (Still waiting for this...)
> 
> It was sent quite a while ago, but I need to resend it..
> 
> > 
> > > Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> > > ---
> > >  fs/xfs/xfs_iomap.c   | 132 ++++++++++++++++++++++++++++++-------------
> > >  fs/xfs/xfs_reflink.c |  67 ----------------------
> > >  fs/xfs/xfs_trace.h   |   1 -
> > >  3 files changed, 93 insertions(+), 107 deletions(-)
> > > 
> > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > > index d851abac16a9..d19f99e5476a 100644
> > > --- a/fs/xfs/xfs_iomap.c
> > > +++ b/fs/xfs/xfs_iomap.c
> > > @@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay(
> > >  {
> > >  	struct xfs_inode	*ip = XFS_I(inode);
> > >  	struct xfs_mount	*mp = ip->i_mount;
> > > -	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > >  	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> > >  	xfs_fileoff_t		maxbytes_fsb =
> > >  		XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
> > >  	xfs_fileoff_t		end_fsb;
> > > -	int			error = 0, eof = 0;
> > > -	struct xfs_bmbt_irec	got;
> > > -	struct xfs_iext_cursor	icur;
> > > +	struct xfs_bmbt_irec	imap, cmap;
> > > +	struct xfs_iext_cursor	icur, ccur;
> > >  	xfs_fsblock_t		prealloc_blocks = 0;
> > > +	bool			eof = false, cow_eof = false, shared;
> > > +	int			whichfork = XFS_DATA_FORK;
> > > +	int			error = 0;
> > >  
> > >  	ASSERT(!XFS_IS_REALTIME_INODE(ip));
> > >  	ASSERT(!xfs_get_extsz_hint(ip));
> > > @@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay(
> > >  
> > >  	XFS_STATS_INC(mp, xs_blk_mapw);
> > >  
> > > -	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > > +	if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
> > >  		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> > >  		if (error)
> > >  			goto out_unlock;
> > > @@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay(
> > >  
> > >  	end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
> > >  
> > > -	eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
> > > +	/*
> > > +	 * Search the data fork fork first to look up our source mapping.  We
> > > +	 * always need the data fork map, as we have to return it to the
> > > +	 * iomap code so that the higher level write code can read data in to
> > > +	 * perform read-modify-write cycles for unaligned writes.
> > > +	 */
> > > +	eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
> > >  	if (eof)
> > > -		got.br_startoff = end_fsb; /* fake hole until the end */
> > > +		imap.br_startoff = end_fsb; /* fake hole until the end */
> > >  
> > > -	if (got.br_startoff <= offset_fsb) {
> > > +	/* We never need to allocate blocks for zeroing a hole. */
> > > +	if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
> > > +		xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
> > > +		goto out_unlock;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Search the COW fork extent list even if we did not find a data fork
> > > +	 * extent.  This serves two purposes: first this implements the
> > > +	 * speculative preallocation using cowextisze, so that we also unshare
> > 
> > "cowextsize"
> > 
> > > +	 * block adjacent to shared blocks instead of just the shared blocks
> > > +	 * themselves.  Second the lookup in the extent list is generally faster
> > > +	 * than going out to the shared extent tree.
> > > +	 */
> > > +	if (xfs_is_reflink_inode(ip)) {
> > > +		cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
> > > +				&ccur, &cmap);
> > > +		if (!cow_eof && cmap.br_startoff <= offset_fsb) {
> > > +			trace_xfs_reflink_cow_found(ip, &cmap);
> > > +			whichfork = XFS_COW_FORK;
> > > +			goto done;
> > > +		}
> > > +	}
> > > +
> > > +	if (imap.br_startoff <= offset_fsb) {
> > >  		/*
> > >  		 * For reflink files we may need a delalloc reservation when
> > >  		 * overwriting shared extents.   This includes zeroing of
> > >  		 * existing extents that contain data.
> > >  		 */
> > > -		if (xfs_is_reflink_inode(ip) &&
> > > -		    ((flags & IOMAP_WRITE) ||
> > > -		     got.br_state != XFS_EXT_UNWRITTEN)) {
> > > -			xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
> > > -			error = xfs_reflink_reserve_cow(ip, &got);
> > > -			if (error)
> > > -				goto out_unlock;
> > > +		if (!xfs_is_reflink_inode(ip) ||
> > > +		    ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
> > > +			trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
> > > +					&imap);
> > > +			goto done;
> > >  		}
> > >  
> > > -		trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got);
> > > -		goto done;
> > > -	}
> > > +		xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
> > >  
> > > -	if (flags & IOMAP_ZERO) {
> > > -		xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff);
> > > -		goto out_unlock;
> > > +		/* Trim the mapping to the nearest shared extent boundary. */
> > > +		error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
> > > +		if (error)
> > > +			goto out_unlock;
> > > +
> > > +		/* Not shared?  Just report the (potentially capped) extent. */
> > > +		if (!shared) {
> > > +			trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
> > > +					&imap);
> > > +			goto done;
> > > +		}
> > > +
> > > +		/*
> > > +		 * Fork all the shared blocks from our write offset until the
> > > +		 * end of the extent.
> > > +		 */
> > > +		whichfork = XFS_COW_FORK;
> > > +		end_fsb = imap.br_startoff + imap.br_blockcount;
> > > +	} else {
> > > +		/*
> > > +		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
> > > +		 * pages to keep the chunks of work done where somewhat
> > > +		 * symmetric with the work writeback does.  This is a completely
> > > +		 * arbitrary number pulled out of thin air.
> > > +		 *
> > > +		 * Note that the values needs to be less than 32-bits wide until
> > > +		 * the lower level functions are updated.
> > > +		 */
> > > +		count = min_t(loff_t, count, 1024 * PAGE_SIZE);
> > > +		end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
> > >  	}
> > >  
> > >  	error = xfs_qm_dqattach_locked(ip, false);
> > >  	if (error)
> > >  		goto out_unlock;
> > >  
> > > -	/*
> > > -	 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages
> > > -	 * to keep the chunks of work done where somewhat symmetric with the
> > > -	 * work writeback does. This is a completely arbitrary number pulled
> > > -	 * out of thin air as a best guess for initial testing.
> > > -	 *
> > > -	 * Note that the values needs to be less than 32-bits wide until
> > > -	 * the lower level functions are updated.
> > > -	 */
> > > -	count = min_t(loff_t, count, 1024 * PAGE_SIZE);
> > > -	end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
> > > -
> > > -	if (eof) {
> > > +	if (eof && whichfork == XFS_DATA_FORK) {
> > >  		prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
> > >  				&icur);
> > >  		if (prealloc_blocks) {
> > > @@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay(
> > >  	}
> > >  
> > >  retry:
> > > -	error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
> > > -			end_fsb - offset_fsb, prealloc_blocks, &got, &icur,
> > > -			eof);
> > > +	error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb,
> > > +			end_fsb - offset_fsb, prealloc_blocks,
> > > +			whichfork == XFS_DATA_FORK ? &imap : &cmap,
> > > +			whichfork == XFS_DATA_FORK ? &icur : &ccur,
> > > +			whichfork == XFS_DATA_FORK ? eof : cow_eof);
> > >  	switch (error) {
> > >  	case 0:
> > >  		break;
> > > @@ -659,9 +703,19 @@ xfs_file_iomap_begin_delay(
> > >  	 * them out if the write happens to fail.
> > >  	 */
> > >  	iomap->flags |= IOMAP_F_NEW;
> > > -	trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got);
> > > +	trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap);
> > 
> > I'm confused by this, if whichfork == COW then won't ftrace report
> > results from the wrong fork?
> 
> The question is what the "right" fork to trace is as both matter here.
> At least we now clearly tell you which fork the trace belongs too.

True.  But I think it's misleading to have a tracepoint report say "cow"
when the extent data it records is actually from the data fork, and
particularly because we sometimes pass cmap back to the caller when
whichfork == COW.

If I were tracing through here I'd probably want to know what we found
from both forks at that particular point in time so that I could
continue following the logic.

--D



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux