From: Dave Chinner <dchinner@xxxxxxxxxx> Currently the report of a bio error from completion immediately marks the buffer with an error. The issue is that this is racy w.r.t. synchronous IO - the submitter can see b_error being set before the IO is complete, and hence we cannot differentiate between submission failures and completion failures. Add an internal b_io_error field protected by the b_lock to catch IO completion errors, and only propagate that to the buffer during final IO completion handling. Hence we can tell in xfs_buf_iorequest if we've had a submission failure bey checking bp->b_error before dropping our b_io_remaining reference - that reference will prevent b_io_error values from being propagated to b_error in the event that completion races with submission. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/xfs_buf.c | 18 ++++++++++++++++-- fs/xfs/xfs_buf.h | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 08f9fca..a8f4a39 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1008,6 +1008,13 @@ xfs_buf_ioend( bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); + /* + * Pull in IO completion errors now. We are guaranteed to be running + * single threaded, so we don't need the lock to read b_io_error. + */ + if (!bp->b_error && bp->b_io_error) + xfs_buf_ioerror(bp, bp->b_io_error); + /* Only validate buffers that were read without errors */ if (read && !bp->b_error && bp->b_ops) { ASSERT(!bp->b_iodone); @@ -1192,8 +1199,12 @@ xfs_buf_bio_end_io( * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (!bp->b_error) - xfs_buf_ioerror(bp, error); + if (error) { + spin_lock(&bp->b_lock); + if (!bp->b_io_error) + bp->b_io_error = error; + spin_unlock(&bp->b_lock); + } if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); @@ -1379,6 +1390,9 @@ xfs_buf_iorequest( if (bp->b_flags & XBF_WRITE) xfs_buf_wait_unpin(bp); + /* clear the internal error state to avoid spurious errors */ + bp->b_io_error = 0; + /* * Take references to the buffer. For XBF_ASYNC buffers, holding a * reference for as long as submission takes is all that is necessary diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 4585c15..44db8cd 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -158,6 +158,7 @@ typedef struct xfs_buf { struct list_head b_lru; /* lru list */ spinlock_t b_lock; /* internal state lock */ unsigned int b_state; /* internal state flags */ + int b_io_error; /* internal IO error state */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ -- 2.0.0 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs