From: Darrick J. Wong <djwong@xxxxxxxxxx> After we've run xfs_scrub -n to perform a check of a mounted filesystem's metadata, we should check the health reporting system to make sure that the results got recorded. Also wire this up to the xfs fuzz testing helpers. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- common/fuzzy | 27 +++++++++++++++++++++++++++ common/xfs | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/common/fuzzy b/common/fuzzy index cf085f8b28..d841d435eb 100644 --- a/common/fuzzy +++ b/common/fuzzy @@ -216,6 +216,15 @@ __scratch_xfs_fuzz_field_online() { test $res -eq 0 && \ (>&2 echo "${fuzz_action}: online scrub didn't fail.") + # Does the health status report reflect the corruption? + if [ $res -ne 0 ]; then + __fuzz_notify "++ Detect fuzzed field ill-health report" + _check_xfs_health $SCRATCH_MNT 2>&1 + res=$? + test $res -ne 1 && \ + (>&2 echo "${fuzz_action}: online health check failed ($res).") + fi + # Try fixing the filesystem online __fuzz_notify "++ Try to repair filesystem (online)" _scratch_scrub 2>&1 @@ -308,6 +317,15 @@ __scratch_xfs_fuzz_field_norepair() { test $res -eq 0 && \ (>&2 echo "${fuzz_action}: online scrub didn't fail.") + # Does the health status report reflect the corruption? + if [ $res -ne 0 ]; then + __fuzz_notify "++ Detect fuzzed field ill-health report" + _check_xfs_health $SCRATCH_MNT 2>&1 + res=$? + test $res -ne 1 && \ + (>&2 echo "${fuzz_action}: online health check failed ($res).") + fi + __scratch_xfs_fuzz_unmount return 0 @@ -338,6 +356,15 @@ __scratch_xfs_fuzz_field_both() { test $res -eq 0 && \ (>&2 echo "${fuzz_action}: online scrub didn't fail.") + # Does the health status report reflect the corruption? + if [ $res -ne 0 ]; then + __fuzz_notify "++ Detect fuzzed field ill-health report" + _check_xfs_health $SCRATCH_MNT 2>&1 + res=$? + test $res -ne 1 && \ + (>&2 echo "${fuzz_action}: online health check failed ($res).") + fi + # Try fixing the filesystem online __fuzz_notify "++ Try to repair filesystem (online)" _scratch_scrub 2>&1 diff --git a/common/xfs b/common/xfs index 804047557b..371618dc7b 100644 --- a/common/xfs +++ b/common/xfs @@ -599,6 +599,37 @@ _require_xfs_db_command() _notrun "xfs_db $command support is missing" } +# Check the health of a mounted XFS filesystem. Callers probably want to +# ensure that xfs_scrub has been run first. Returns 1 if unhealthy metadata +# are found or 0 otherwise. +_check_xfs_health() { + local mntpt="$1" + local ret=0 + local t="$tmp.health_helper" + + test -x "$XFS_SPACEMAN_PROG" || return 0 + + $XFS_SPACEMAN_PROG -c 'health -c -q' $mntpt > $t.out 2> $t.err + test $? -ne 0 && ret=1 + + # Don't return error if userspace or kernel don't support health + # reporting. + grep -q 'command.*health.*not found' $t.err && return 0 + grep -q 'Inappropriate ioctl for device' $t.err && return 0 + + # Filter out the "please run scrub" message if nothing's been checked. + sed -e '/Health status has not been/d' -e '/Please run xfs_scrub/d' -i \ + $t.err + + grep -q unhealthy $t.out && ret=1 + test $(wc -l < $t.err) -gt 0 && ret=1 + cat $t.out + cat $t.err 1>&2 + rm -f $t.out $t.err + + return $ret +} + # Does the filesystem mounted from a particular device support scrub? _supports_xfs_scrub() { @@ -750,6 +781,18 @@ _check_xfs_filesystem() ok=0 fi rm -f $tmp.scrub + + # Does the health reporting notice anything? + _check_xfs_health $mntpt > $tmp.health 2>&1 + res=$? + if [ $((res ^ ok)) -eq 0 ]; then + _log_err "_check_xfs_filesystem: filesystem on $device failed health check" + echo "*** xfs_spaceman -c 'health -c -q' output ***" >> $seqres.full + cat $tmp.health >> $seqres.full + echo "*** end xfs_spaceman output" >> $seqres.full + ok=0 + fi + rm -f $tmp.health fi if [ "$type" = "xfs" ]; then