Re: [PATCH 03/12] xfs: always attach iflush_done and simplify error handling

Brian Foster <bfoster@xxxxxxxxxx> · Mon, 20 Apr 2020 09:59:10 -0400

On Fri, Apr 17, 2020 at 05:07:16PM -0700, Allison Collins wrote:
> On 4/17/20 8:08 AM, Brian Foster wrote:
> > The inode flush code has several layers of error handling between
> > the inode and cluster flushing code. If the inode flush fails before
> > acquiring the backing buffer, the inode flush is aborted. If the
> > cluster flush fails, the current inode flush is aborted and the
> > cluster buffer is failed to handle the initial inode and any others
> > that might have been attached before the error.
> > 
> > Since xfs_iflush() is the only caller of xfs_iflush_cluser(), the
> > error handling between the two can be condensed in the top-level
> > function. If we update xfs_iflush_int() to attach the item
> > completion handler to the buffer first, any errors that occur after
> > the first call to xfs_iflush_int() can be handled with a buffer
> > I/O failure.
> > 
> > Lift the error handling from xfs_iflush_cluster() into xfs_iflush()
> > and consolidate with the existing error handling. This also replaces
> > the need to release the buffer because failing the buffer with
> > XBF_ASYNC drops the current reference.
> > 
> > Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx>
> > ---
> >   fs/xfs/xfs_inode.c | 94 +++++++++++++++-------------------------------
> >   1 file changed, 30 insertions(+), 64 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index b539ee221ce5..4c9971ec6fa6 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3496,6 +3496,7 @@ xfs_iflush_cluster(
> >   	struct xfs_inode	**cilist;
> >   	struct xfs_inode	*cip;
> >   	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
> > +	int			error = 0;
> >   	int			nr_found;
> >   	int			clcount = 0;
> >   	int			i;
> > @@ -3588,11 +3589,10 @@ xfs_iflush_cluster(
> >   		 * re-check that it's dirty before flushing.
> >   		 */
> >   		if (!xfs_inode_clean(cip)) {
> > -			int	error;
> >   			error = xfs_iflush_int(cip, bp);
> >   			if (error) {
> >   				xfs_iunlock(cip, XFS_ILOCK_SHARED);
> > -				goto cluster_corrupt_out;
> > +				goto out_free;
> >   			}
> >   			clcount++;
> >   		} else {
> > @@ -3611,32 +3611,7 @@ xfs_iflush_cluster(
> >   	kmem_free(cilist);
> >   out_put:
> >   	xfs_perag_put(pag);
> > -	return 0;
> > -
> > -
> > -cluster_corrupt_out:
> > -	/*
> > -	 * Corruption detected in the clustering loop.  Invalidate the
> > -	 * inode buffer and shut down the filesystem.
> > -	 */
> > -	rcu_read_unlock();
> Hmm, I can't find where this unlock moved?  Is it not needed anymore?  I
> think I followed the rest of it though.
> 

It's not needed because the whole cluster_corrupt_out path goes away.
out_free is used instead, which also calls rcu_read_unlock() and returns
the error to the caller..

> Reviewed-by: Allison Collins <allison.henderson@xxxxxxxxxx>
> 

Thanks!

Brian

> Allison
> 
> > -
> > -	/*
> > -	 * We'll always have an inode attached to the buffer for completion
> > -	 * process by the time we are called from xfs_iflush(). Hence we have
> > -	 * always need to do IO completion processing to abort the inodes
> > -	 * attached to the buffer.  handle them just like the shutdown case in
> > -	 * xfs_buf_submit().
> > -	 */
> > -	ASSERT(bp->b_iodone);
> > -	xfs_buf_iofail(bp, XBF_ASYNC);
> > -	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> > -
> > -	/* abort the corrupt inode, as it was not attached to the buffer */
> > -	xfs_iflush_abort(cip, false);
> > -	kmem_free(cilist);
> > -	xfs_perag_put(pag);
> > -	return -EFSCORRUPTED;
> > +	return error;
> >   }
> >   /*
> > @@ -3692,17 +3667,16 @@ xfs_iflush(
> >   	 */
> >   	if (XFS_FORCED_SHUTDOWN(mp)) {
> >   		error = -EIO;
> > -		goto abort_out;
> > +		goto abort;
> >   	}
> >   	/*
> >   	 * Get the buffer containing the on-disk inode. We are doing a try-lock
> > -	 * operation here, so we may get  an EAGAIN error. In that case, we
> > -	 * simply want to return with the inode still dirty.
> > +	 * operation here, so we may get an EAGAIN error. In that case, return
> > +	 * leaving the inode dirty.
> >   	 *
> >   	 * If we get any other error, we effectively have a corruption situation
> > -	 * and we cannot flush the inode, so we treat it the same as failing
> > -	 * xfs_iflush_int().
> > +	 * and we cannot flush the inode. Abort the flush and shut down.
> >   	 */
> >   	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
> >   			       0);
> > @@ -3711,14 +3685,7 @@ xfs_iflush(
> >   		return error;
> >   	}
> >   	if (error)
> > -		goto corrupt_out;
> > -
> > -	/*
> > -	 * First flush out the inode that xfs_iflush was called with.
> > -	 */
> > -	error = xfs_iflush_int(ip, bp);
> > -	if (error)
> > -		goto corrupt_out;
> > +		goto abort;
> >   	/*
> >   	 * If the buffer is pinned then push on the log now so we won't
> > @@ -3728,28 +3695,28 @@ xfs_iflush(
> >   		xfs_log_force(mp, 0);
> >   	/*
> > -	 * inode clustering: try to gather other inodes into this write
> > +	 * Flush the provided inode then attempt to gather others from the
> > +	 * cluster into the write.
> >   	 *
> > -	 * Note: Any error during clustering will result in the filesystem
> > -	 * being shut down and completion callbacks run on the cluster buffer.
> > -	 * As we have already flushed and attached this inode to the buffer,
> > -	 * it has already been aborted and released by xfs_iflush_cluster() and
> > -	 * so we have no further error handling to do here.
> > +	 * Note: Once we attempt to flush an inode, we must run buffer
> > +	 * completion callbacks on any failure. If this fails, simulate an I/O
> > +	 * failure on the buffer and shut down.
> >   	 */
> > -	error = xfs_iflush_cluster(ip, bp);
> > -	if (error)
> > -		return error;
> > +	error = xfs_iflush_int(ip, bp);
> > +	if (!error)
> > +		error = xfs_iflush_cluster(ip, bp);
> > +	if (error) {
> > +		xfs_buf_iofail(bp, XBF_ASYNC);
> > +		goto shutdown;
> > +	}
> >   	*bpp = bp;
> >   	return 0;
> > -corrupt_out:
> > -	if (bp)
> > -		xfs_buf_relse(bp);
> > -	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> > -abort_out:
> > -	/* abort the corrupt inode, as it was not attached to the buffer */
> > +abort:
> >   	xfs_iflush_abort(ip, false);
> > +shutdown:
> > +	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> >   	return error;
> >   }
> > @@ -3798,6 +3765,13 @@ xfs_iflush_int(
> >   	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
> >   	ASSERT(iip != NULL && iip->ili_fields != 0);
> > +	/*
> > +	 * Attach the inode item callback to the buffer. Whether the flush
> > +	 * succeeds or not, buffer I/O completion processing is now required to
> > +	 * remove the inode from the AIL and release the flush lock.
> > +	 */
> > +	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
> > +
> >   	/* set *dip = inode's place in the buffer */
> >   	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
> > @@ -3913,14 +3887,6 @@ xfs_iflush_int(
> >   	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
> >   				&iip->ili_item.li_lsn);
> > -	/*
> > -	 * Attach the function xfs_iflush_done to the inode's
> > -	 * buffer.  This will remove the inode from the AIL
> > -	 * and unlock the inode's flush lock when the inode is
> > -	 * completely written to disk.
> > -	 */
> > -	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
> > -
> >   	/* generate the checksum. */
> >   	xfs_dinode_calc_crc(mp, dip);
> > 
>