---
fs/xfs/xfs_inode.c | 117 +++++++++++++++++----------------------------
1 file changed, 45 insertions(+), 72 deletions(-)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 909ca7c0bac4..84f2ee9957dc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3496,6 +3496,7 @@ xfs_iflush_cluster(
struct xfs_inode **cilist;
struct xfs_inode *cip;
struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ int error = 0;
int nr_found;
int clcount = 0;
int i;
@@ -3588,11 +3589,10 @@ xfs_iflush_cluster(
* re-check that it's dirty before flushing.
*/
if (!xfs_inode_clean(cip)) {
- int error;
error = xfs_iflush_int(cip, bp);
if (error) {
xfs_iunlock(cip, XFS_ILOCK_SHARED);
- goto cluster_corrupt_out;
+ goto out_free;
}
clcount++;
} else {
@@ -3611,33 +3611,7 @@ xfs_iflush_cluster(
kmem_free(cilist);
out_put:
xfs_perag_put(pag);
- return 0;
-
-
-cluster_corrupt_out:
- /*
- * Corruption detected in the clustering loop. Invalidate the
- * inode buffer and shut down the filesystem.
- */
- rcu_read_unlock();
-
- /*
- * We'll always have an inode attached to the buffer for completion
- * process by the time we are called from xfs_iflush(). Hence we have
- * always need to do IO completion processing to abort the inodes
- * attached to the buffer. handle them just like the shutdown case in
- * xfs_buf_submit().
- */
- ASSERT(bp->b_iodone);
- bp->b_flags |= XBF_ASYNC;
- xfs_buf_ioend_fail(bp);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
- /* abort the corrupt inode, as it was not attached to the buffer */
- xfs_iflush_abort(cip, false);
- kmem_free(cilist);
- xfs_perag_put(pag);
- return -EFSCORRUPTED;
+ return error;
}
/*
@@ -3693,17 +3667,16 @@ xfs_iflush(
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
error = -EIO;
- goto abort_out;
+ goto abort;
}
/*
* Get the buffer containing the on-disk inode. We are doing a try-lock
- * operation here, so we may get an EAGAIN error. In that case, we
- * simply want to return with the inode still dirty.
+ * operation here, so we may get an EAGAIN error. In that case, return
+ * leaving the inode dirty.
*
* If we get any other error, we effectively have a corruption situation
- * and we cannot flush the inode, so we treat it the same as failing
- * xfs_iflush_int().
+ * and we cannot flush the inode. Abort the flush and shut down.
*/
error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
0);
@@ -3712,14 +3685,7 @@ xfs_iflush(
return error;
}
if (error)
- goto corrupt_out;
-
- /*
- * First flush out the inode that xfs_iflush was called with.
- */
- error = xfs_iflush_int(ip, bp);
- if (error)
- goto corrupt_out;
+ goto abort;
/*
* If the buffer is pinned then push on the log now so we won't
@@ -3729,28 +3695,29 @@ xfs_iflush(
xfs_log_force(mp, 0);
/*
- * inode clustering: try to gather other inodes into this write
+ * Flush the provided inode then attempt to gather others from the
+ * cluster into the write.
*
- * Note: Any error during clustering will result in the filesystem
- * being shut down and completion callbacks run on the cluster buffer.
- * As we have already flushed and attached this inode to the buffer,
- * it has already been aborted and released by xfs_iflush_cluster() and
- * so we have no further error handling to do here.
+ * Note: Once we attempt to flush an inode, we must run buffer
+ * completion callbacks on any failure. If this fails, simulate an I/O
+ * failure on the buffer and shut down.
*/
- error = xfs_iflush_cluster(ip, bp);
- if (error)
- return error;
+ error = xfs_iflush_int(ip, bp);
+ if (!error)
+ error = xfs_iflush_cluster(ip, bp);
+ if (error) {
+ bp->b_flags |= XBF_ASYNC;
+ xfs_buf_ioend_fail(bp);
+ goto shutdown;
+ }
*bpp = bp;
return 0;
-corrupt_out:
- if (bp)
- xfs_buf_relse(bp);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-abort_out:
- /* abort the corrupt inode, as it was not attached to the buffer */
+abort:
xfs_iflush_abort(ip, false);
+shutdown:
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;
}
@@ -3792,6 +3759,7 @@ xfs_iflush_int(
struct xfs_inode_log_item *iip = ip->i_itemp;
struct xfs_dinode *dip;
struct xfs_mount *mp = ip->i_mount;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
@@ -3799,15 +3767,21 @@ xfs_iflush_int(
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
ASSERT(iip != NULL && iip->ili_fields != 0);
- /* set *dip = inode's place in the buffer */
dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
+ /*
+ * We don't flush the inode if any of the following checks fail, but we
+ * do still update the log item and attach to the backing buffer as if
+ * the flush happened. This is a formality to facilitate predictable
+ * error handling as the caller will shutdown and fail the buffer.
+ */
+ error = -EFSCORRUPTED;
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
- goto corrupt_out;
+ goto flush_out;
}
if (S_ISREG(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR(
@@ -3817,7 +3791,7 @@ xfs_iflush_int(
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad regular inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
- goto corrupt_out;
+ goto flush_out;
}
} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR(
@@ -3828,7 +3802,7 @@ xfs_iflush_int(
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad directory inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
- goto corrupt_out;
+ goto flush_out;
}
}
if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
@@ -3839,14 +3813,14 @@ xfs_iflush_int(
__func__, ip->i_ino,
ip->i_d.di_nextents + ip->i_d.di_anextents,
ip->i_d.di_nblocks, ip);
- goto corrupt_out;
+ goto flush_out;
}
if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
- goto corrupt_out;
+ goto flush_out;
}
/*
@@ -3863,7 +3837,7 @@ xfs_iflush_int(
/* Check the inline fork data before we write out. */
if (!xfs_inode_verify_forks(ip))
- goto corrupt_out;
+ goto flush_out;
/*
* Copy the dirty parts of the inode into the on-disk inode. We always
@@ -3906,6 +3880,8 @@ xfs_iflush_int(
* need the AIL lock, because it is a 64 bit value that cannot be read
* atomically.
*/
+ error = 0;
+flush_out:
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
iip->ili_fsync_fields = 0;
@@ -3915,10 +3891,10 @@ xfs_iflush_int(
&iip->ili_item.li_lsn);
/*
- * Attach the function xfs_iflush_done to the inode's
- * buffer. This will remove the inode from the AIL
- * and unlock the inode's flush lock when the inode is
- * completely written to disk.
+ * Attach the inode item callback to the buffer whether the flush
+ * succeeded or not. If not, the caller will shut down and fail I/O
+ * completion on the buffer to remove the inode from the AIL and release
+ * the flush lock.
*/
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
@@ -3927,10 +3903,7 @@ xfs_iflush_int(
ASSERT(!list_empty(&bp->b_li_list));
ASSERT(bp->b_iodone != NULL);
- return 0;
-
-corrupt_out:
- return -EFSCORRUPTED;
+ return error;
}
/* Release an inode. */