On Tue, 2024-12-31 at 15:56 -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@xxxxxxxxxx> > > Capture the kernel output after a mount failure occurs. If the test > itself fails, then keep the logging output for further diagnosis. > > Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> > --- > check | 22 +++++++++++++++++++++- > common/rc | 26 +++++++++++++++++++++++++- > common/report | 8 ++++++++ > tests/selftest/008 | 20 ++++++++++++++++++++ > tests/selftest/008.out | 1 + > 5 files changed, 75 insertions(+), 2 deletions(-) > create mode 100755 tests/selftest/008 > create mode 100644 tests/selftest/008.out > > > diff --git a/check b/check > index 9222cd7e4f8197..a46ea1a54d78bb 100755 > --- a/check > +++ b/check > @@ -614,7 +614,7 @@ _stash_fail_loop_files() { > local seq_prefix="${REPORT_DIR}/${1}" > local cp_suffix="$2" > > - for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" > ".hints"; do > + for i in ".full" ".dmesg" ".out.bad" ".notrun" ".core" ".hints" > ".mountfail"; do > rm -f "${seq_prefix}${i}${cp_suffix}" > if [ -f "${seq_prefix}${i}" ]; then > cp "${seq_prefix}${i}" > "${seq_prefix}${i}${cp_suffix}" > @@ -994,6 +994,7 @@ function run_section() > echo -n " $seqnum -- " > cat $seqres.notrun > tc_status="notrun" > + rm -f "$seqres.mountfail?" > _stash_test_status "$seqnum" "$tc_status" > > # Unmount the scratch fs so that we can wipe > the scratch > @@ -1053,6 +1054,7 @@ function run_section() > if [ ! -f $seq.out ]; then > _dump_err "no qualified output" > tc_status="fail" > + rm -f "$seqres.mountfail?" > _stash_test_status "$seqnum" "$tc_status" > continue; > fi > @@ -1089,6 +1091,24 @@ function run_section() > rm -f $seqres.hints > fi > fi > + > + if [ -f "$seqres.mountfail?" ]; then > + if [ "$tc_status" = "fail" ]; then > + # Let the user know if there were mount > + # failures on a test that failed > because that > + # could be interesting. > + mv "$seqres.mountfail?" > "$seqres.mountfail" > + _dump_err "check: possible mount > failures (see $seqres.mountfail)" > + test -f $seqres.mountfail && \ > + maybe_compress_logfile > $seqres.mountfail $MAX_MOUNTFAIL_SIZE > + else > + # Don't retain mount failure logs for > tests > + # that pass or were skipped because > some tests > + # intentionally drive mount failures. > + rm -f "$seqres.mountfail?" > + fi > + fi > + > _stash_test_status "$seqnum" "$tc_status" > done > > diff --git a/common/rc b/common/rc > index d7dfb55bbbd7e1..0ede68eb912440 100644 > --- a/common/rc > +++ b/common/rc > @@ -204,9 +204,33 @@ _get_hugepagesize() > awk '/Hugepagesize/ {print $2 * 1024}' /proc/meminfo > } > > +# Does dmesg have a --since flag? > +_dmesg_detect_since() > +{ > + if [ -z "$DMESG_HAS_SINCE" ]; then > + test "$DMESG_HAS_SINCE" = "yes" > + return > + elif dmesg --help | grep -q -- --since; then > + DMESG_HAS_SINCE=yes > + else > + DMESG_HAS_SINCE=no > + fi > +} > + > _mount() > { > - $MOUNT_PROG $* > + $MOUNT_PROG $* > + ret=$? > + if [ "$ret" -ne 0 ]; then > + echo "\"$MOUNT_PROG $*\" failed at $(date)" >> > "$seqres.mountfail?" > + if _dmesg_detect_since; then > + dmesg --since '30s ago' >> "$seqres.mountfail?" > + else > + dmesg | tail -n 100 >> "$seqres.mountfail?" Is it possible to grep for a mount failure message in dmesg and then capture the last n lines? Do you think that will be more accurate? Also, do you think it is useful to make this 100 configurable instead of hardcoding? > + fi > + fi > + > + return $ret > } > > # Call _mount to do mount operation but also save mountpoint to > diff --git a/common/report b/common/report > index 0e91e481f9725a..b57697f76dafb2 100644 > --- a/common/report > +++ b/common/report > @@ -199,6 +199,7 @@ _xunit_make_testcase_report() > local out_src="${SRC_DIR}/${test_name}.out" > local full_file="${REPORT_DIR}/${test_name}.full" > local dmesg_file="${REPORT_DIR}/${test_name}.dmesg" > + local > mountfail_file="${REPORT_DIR}/${test_name}.mountfail" > local outbad_file="${REPORT_DIR}/${test_name}.out.bad" > if [ -z "$_err_msg" ]; then > _err_msg="Test $test_name failed, reason > unknown" > @@ -225,6 +226,13 @@ _xunit_make_testcase_report() > printf ']]>\n' >>$report > echo -e "\t\t</system-err>" >> $report > fi > + if [ -z "$quiet" -a -f "$mountfail_file" ]; then > + echo -e "\t\t<mount-failure>" >> $report > + printf '<![CDATA[\n' >>$report > + cat "$mountfail_file" | tr -dc > '[:print:][:space:]' | encode_cdata >>$report > + printf ']]>\n' >>$report > + echo -e "\t\t</mount-failure>" >> $report > + fi > ;; > *) > echo -e "\t\t<failure message=\"Unknown > test_status=$test_status\" type=\"TestFail\"/>" >> $report > diff --git a/tests/selftest/008 b/tests/selftest/008 > new file mode 100755 > index 00000000000000..db80ffe6f77339 > --- /dev/null > +++ b/tests/selftest/008 > @@ -0,0 +1,20 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) 2024-2025 Oracle. All Rights Reserved. > +# > +# FS QA Test 008 > +# > +# Test mount failure capture. > +# > +. ./common/preamble > +_begin_fstest selftest > + > +_require_command "$WIPEFS_PROG" wipefs > +_require_scratch > + > +$WIPEFS_PROG -a $SCRATCH_DEV > +_scratch_mount &>> $seqres.full Minor: Do you think adding some filtered messages from the captured dmesg logs in the output will be helpful? > + > +# success, all done > +status=0 > +exit > diff --git a/tests/selftest/008.out b/tests/selftest/008.out > new file mode 100644 > index 00000000000000..aaff95f3f48372 > --- /dev/null > +++ b/tests/selftest/008.out > @@ -0,0 +1 @@ > +QA output created by 008 >