From: Darrick J. Wong <djwong@xxxxxxxxxx> There's no point in continuing a stress test of online fsck if the filesystem goes down. We can't query that kind of state directly, so as a proxy we try to stat the mountpoint and interpret any error return as a sign that the fs is down. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- common/fuzzy | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/common/fuzzy b/common/fuzzy index 6519d5c1e2..f1bc2dc756 100644 --- a/common/fuzzy +++ b/common/fuzzy @@ -338,10 +338,17 @@ __stress_scrub_filter_output() { -e '/No space left on device/d' } +# Decide if the scratch filesystem is still alive. +__stress_scrub_scratch_alive() { + # If we can't stat the scratch filesystem, there's a reasonably good + # chance that the fs shut down, which is not good. + stat "$SCRATCH_MNT" &>/dev/null +} + # Decide if we want to keep running stress tests. The first argument is the # stop time, and second argument is the path to the sentinel file. __stress_scrub_running() { - test -e "$2" && test "$(date +%s)" -lt "$1" + test -e "$2" && test "$(date +%s)" -lt "$1" && __stress_scrub_scratch_alive } # Run fs freeze and thaw in a tight loop. @@ -486,6 +493,10 @@ _scratch_xfs_stress_scrub() { done _scratch_xfs_stress_scrub_cleanup + # Warn the user if we think the scratch filesystem went down. + __stress_scrub_scratch_alive || \ + echo "Did the scratch filesystem die?" + echo "Loop finished at $(date)" >> $seqres.full }