From: Dave Chinner <dchinner@xxxxxxxxxx> If we take "retry forever" literally on metadata IO errors, we can hang at unmount, once it retries those writes forever. This is the default behavior, unfortunately. Currently, there is no way to move the unmount forward, once the mount point will be already detached, making a call to xfs_io shutdown impossible. To fix this, we need to mark the filesystem as being in the process of unmounting, so that a shutdown can be triggered in case of errors. It will be noisy, though, as it will log the errors and the shutdown that occurs. Do this with a mount flag that is added at the appropriate time (i.e. before the blocking AIL sync). We also need to add this flag if mount fails after the initial phase of log recovery has been run. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Signed-off-by: Carlos Maiolino <cmaiolino@xxxxxxxxxx> --- fs/xfs/xfs_buf_item.c | 4 ++++ fs/xfs/xfs_mount.c | 9 +++++++++ fs/xfs/xfs_mount.h | 1 + 3 files changed, 14 insertions(+) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 68ed2a0..3fd3389 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1106,6 +1106,10 @@ xfs_buf_iodone_callback_error( cfg->retry_timeout + bp->b_first_retry_time)) goto permanent_error; + /* At unmount we may treat errors differently */ + if (mp->m_flags & XFS_MOUNT_UNMOUNTING) + goto permanent_error; + /* still a transient error, higher layers will retry */ xfs_buf_ioerror(bp, 0); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index eda3906..d89fba6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -961,6 +961,7 @@ xfs_mountfs( cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); out_log_dealloc: + mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -1012,6 +1013,14 @@ xfs_unmountfs( xfs_log_force(mp, XFS_LOG_SYNC); /* + * We now need to tell the world we are unmounting. This will allow + * us to detect that the filesystem is going away and we should error + * out anything that we have been retrying in the background. This will + * prevent neverending retries iin AIL pushing from hanging the unmount. + */ + mp->m_flags |= XFS_MOUNT_UNMOUNTING; + + /* * Flush all pending changes from the AIL. */ xfs_ail_push_all_sync(mp->m_ail); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e3b3267..54e26fc 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -193,6 +193,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops must be synchronous except for space allocations */ +#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */ #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for -- 2.4.11 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs