On Thu, Jun 04, 2020 at 05:46:02PM +1000, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > Now we have a cached buffer on inode log items, we don't need > to do buffer lookups when flushing inodes anymore - all we need > to do is lock the buffer and we are ready to go. > > This largely gets rid of the need for xfs_iflush(), which is > essentially just a mechanism to look up the buffer and flush the > inode to it. Instead, we can just call xfs_iflush_cluster() with a > few modifications to ensure it also flushes the inode we already > hold locked. > > This allows the AIL inode item pushing to be almost entirely > non-blocking in XFS - we won't block unless memory allocation > for the cluster inode lookup blocks or the block device queues are > full. > > Writeback during inode reclaim becomes a little more complex because > we now have to lock the buffer ourselves, but otherwise this change > is largely a functional no-op that removes a whole lot of code. > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- Looks mostly reasonable.. > fs/xfs/xfs_inode.c | 106 ++++++---------------------------------- > fs/xfs/xfs_inode.h | 2 +- > fs/xfs/xfs_inode_item.c | 54 +++++++++----------- > 3 files changed, 37 insertions(+), 125 deletions(-) > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index af65acd24ec4e..61c872e4ee157 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c ... > @@ -3688,6 +3609,7 @@ xfs_iflush_int( > ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || > ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); > ASSERT(iip != NULL && iip->ili_fields != 0); > + ASSERT(iip->ili_item.li_buf == bp); FWIW, the previous assert includes an iip NULL check. > > dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); > ... > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > index 697248b7eb2be..326547e89cb6b 100644 > --- a/fs/xfs/xfs_inode_item.c > +++ b/fs/xfs/xfs_inode_item.c > @@ -485,53 +485,42 @@ xfs_inode_item_push( > uint rval = XFS_ITEM_SUCCESS; > int error; > > - if (xfs_ipincount(ip) > 0) > + ASSERT(iip->ili_item.li_buf); > + > + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp) || > + (ip->i_flags & XFS_ISTALE)) > return XFS_ITEM_PINNED; > > - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) > - return XFS_ITEM_LOCKED; > + /* If the inode is already flush locked, we're already flushing. */ Or we're racing with reclaim (since we don't have the ilock here any longer)? > + if (xfs_isiflocked(ip)) > + return XFS_ITEM_FLUSHING; > > - /* > - * Re-check the pincount now that we stabilized the value by > - * taking the ilock. > - */ > - if (xfs_ipincount(ip) > 0) { > - rval = XFS_ITEM_PINNED; > - goto out_unlock; > - } > + if (!xfs_buf_trylock(bp)) > + return XFS_ITEM_LOCKED; > > - /* > - * Stale inode items should force out the iclog. > - */ > - if (ip->i_flags & XFS_ISTALE) { > - rval = XFS_ITEM_PINNED; > - goto out_unlock; > + if (bp->b_flags & _XBF_DELWRI_Q) { > + xfs_buf_unlock(bp); > + return XFS_ITEM_FLUSHING; Hmm, what's the purpose of this check? I would expect that we'd still be able to flush to a buffer even though it's delwri queued. We drop the buffer lock after queueing it (and then it's reacquired on delwri submit). > } > + spin_unlock(&lip->li_ailp->ail_lock); > > /* > - * Someone else is already flushing the inode. Nothing we can do > - * here but wait for the flush to finish and remove the item from > - * the AIL. > + * We need to hold a reference for flushing the cluster buffer as it may > + * fail the buffer without IO submission. In which case, we better get a > + * reference for that completion because otherwise we don't get a > + * reference for IO until we queue the buffer for delwri submission. > */ > - if (!xfs_iflock_nowait(ip)) { > - rval = XFS_ITEM_FLUSHING; > - goto out_unlock; > - } > - > - ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); > - spin_unlock(&lip->li_ailp->ail_lock); > - > - error = xfs_iflush(ip, &bp); > + xfs_buf_hold(bp); > + error = xfs_iflush_cluster(ip, bp); > if (!error) { > if (!xfs_buf_delwri_queue(bp, buffer_list)) > rval = XFS_ITEM_FLUSHING; > xfs_buf_relse(bp); > - } else if (error == -EAGAIN) > + } else { > rval = XFS_ITEM_LOCKED; > + } > > spin_lock(&lip->li_ailp->ail_lock); > -out_unlock: > - xfs_iunlock(ip, XFS_ILOCK_SHARED); > return rval; > } > > @@ -548,6 +537,7 @@ xfs_inode_item_release( > > ASSERT(ip->i_itemp != NULL); > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); > + ASSERT(lip->li_buf || !test_bit(XFS_LI_DIRTY, &lip->li_flags)); This is the transaction cancel/abort path, so seems like this should be part of the patch that attaches the ili on logging the inode? Brian > > lock_flags = iip->ili_lock_flags; > iip->ili_lock_flags = 0; > -- > 2.26.2.761.g0e0b3e54be >