From: Darrick J. Wong <djwong@xxxxxxxxxx> Add a new test that races the GETFSMAP ioctl with ro/rw remounting to make sure we don't livelock on the empty transaction that fsmap uses to avoid deadlocking on rmap btree cycles. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- common/fuzzy | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++- ltp/fsstress.c | 18 +++++++++- tests/xfs/732 | 38 +++++++++++++++++++++ tests/xfs/732.out | 2 + 4 files changed, 153 insertions(+), 3 deletions(-) create mode 100755 tests/xfs/732 create mode 100644 tests/xfs/732.out diff --git a/common/fuzzy b/common/fuzzy index 58e299d34b..ee97aa4298 100644 --- a/common/fuzzy +++ b/common/fuzzy @@ -429,6 +429,7 @@ __stress_scrub_clean_scratch() { __stress_scrub_fsx_loop() { local end="$1" local runningfile="$2" + local remount_period="$3" local focus=(-q -X) # quiet, validate file contents # As of November 2022, 2 million fsx ops should be enough to keep @@ -440,6 +441,43 @@ __stress_scrub_fsx_loop() { local args="$FSX_AVOID ${focus[@]} ${SCRATCH_MNT}/fsx.$seq" echo "Running $here/ltp/fsx $args" >> $seqres.full + if [ -n "$remount_period" ]; then + local mode="rw" + local rw_arg="" + while __stress_scrub_running "$end" "$runningfile"; do + # Need to recheck running conditions if we cleared + # anything. + test "$mode" = "rw" && __stress_scrub_clean_scratch && continue + + timeout -s TERM "$remount_period" $here/ltp/fsx \ + $args $rw_arg >> $seqres.full + res=$? + echo "$mode fsx exits with $res at $(date)" >> $seqres.full + if [ "$res" -ne 0 ] && [ "$res" -ne 124 ]; then + # Stop if fsstress returns error. Mask off + # the magic code 124 because that is how the + # timeout(1) program communicates that we ran + # out of time. + break; + fi + if [ "$mode" = "rw" ]; then + mode="ro" + rw_arg="-t 0 -w 0 -FHzCIJBE0" + else + mode="rw" + rw_arg="" + fi + + # Try remounting until we get the result we wanted + while ! _scratch_remount "$mode" &>/dev/null && \ + __stress_scrub_running "$end" "$runningfile"; do + sleep 0.2 + done + done + rm -f "$runningfile" + return 0 + fi + while __stress_scrub_running "$end" "$runningfile"; do # Need to recheck running conditions if we cleared anything __stress_scrub_clean_scratch && continue @@ -453,12 +491,50 @@ __stress_scrub_fsx_loop() { __stress_scrub_fsstress_loop() { local end="$1" local runningfile="$2" + local remount_period="$3" # As of March 2022, 2 million fsstress ops should be enough to keep # any filesystem busy for a couple of hours. local args=$(_scale_fsstress_args -p 4 -d $SCRATCH_MNT -n 2000000 $FSSTRESS_AVOID) echo "Running $FSSTRESS_PROG $args" >> $seqres.full + if [ -n "$remount_period" ]; then + local mode="rw" + local rw_arg="" + while __stress_scrub_running "$end" "$runningfile"; do + # Need to recheck running conditions if we cleared + # anything. + test "$mode" = "rw" && __stress_scrub_clean_scratch && continue + + timeout -s TERM "$remount_period" $FSSTRESS_PROG \ + $args $rw_arg >> $seqres.full + res=$? + echo "$mode fsstress exits with $res at $(date)" >> $seqres.full + if [ "$res" -ne 0 ] && [ "$res" -ne 124 ]; then + # Stop if fsstress returns error. Mask off + # the magic code 124 because that is how the + # timeout(1) program communicates that we ran + # out of time. + break; + fi + if [ "$mode" = "rw" ]; then + mode="ro" + rw_arg="-R" + else + mode="rw" + rw_arg="" + fi + + # Try remounting until we get the result we wanted + while ! _scratch_remount "$mode" &>/dev/null && \ + __stress_scrub_running "$end" "$runningfile"; do + sleep 0.2 + done + done + rm -f "$runningfile" + return 0 + fi + while __stress_scrub_running "$end" "$runningfile"; do # Need to recheck running conditions if we cleared anything __stress_scrub_clean_scratch && continue @@ -526,6 +602,13 @@ _scratch_xfs_stress_scrub_cleanup() { echo "Waiting for children to exit at $(date)" >> $seqres.full wait + # Ensure the scratch fs is also writable before we exit. + if [ -n "$__SCRUB_STRESS_REMOUNT_LOOP" ]; then + echo "Remounting rw at $(date)" >> $seqres.full + _scratch_remount rw >> $seqres.full 2>&1 + __SCRUB_STRESS_REMOUNT_LOOP="" + fi + echo "Cleanup finished at $(date)" >> $seqres.full } @@ -561,6 +644,9 @@ __stress_scrub_check_commands() { # in a separate loop. If zero -i options are specified, do not run. # Callers must check each of these commands (via _require_xfs_io_command) # before calling here. +# -r Run fsstress for this amount of time, then remount the fs ro or rw. +# The default is to run fsstress continuously with no remount, unless +# XFS_SCRUB_STRESS_REMOUNT_PERIOD is set. # -s Pass this command to xfs_io to test scrub. If zero -s options are # specified, xfs_io will not be run. # -t Run online scrub against this file; $SCRATCH_MNT is the default. @@ -577,16 +663,19 @@ _scratch_xfs_stress_scrub() { local scrub_delay="${XFS_SCRUB_STRESS_DELAY:--1}" local exerciser="fsstress" local io_args=() + local remount_period="${XFS_SCRUB_STRESS_REMOUNT_PERIOD}" __SCRUB_STRESS_FREEZE_PID="" + __SCRUB_STRESS_REMOUNT_LOOP="" rm -f "$runningfile" touch "$runningfile" OPTIND=1 - while getopts "fi:s:t:w:X:" c; do + while getopts "fi:r:s:t:w:X:" c; do case "$c" in f) freeze=yes;; i) io_args+=("$OPTARG");; + r) remount_period="$OPTARG";; s) one_scrub_args+=("$OPTARG");; t) scrub_tgt="$OPTARG";; w) scrub_delay="$OPTARG";; @@ -611,7 +700,12 @@ _scratch_xfs_stress_scrub() { echo "Loop started at $(date --date="@${start}")," \ "ending at $(date --date="@${end}")" >> $seqres.full - "__stress_scrub_${exerciser}_loop" "$end" "$runningfile" & + if [ -n "$remount_period" ]; then + __SCRUB_STRESS_REMOUNT_LOOP="1" + fi + + "__stress_scrub_${exerciser}_loop" "$end" "$runningfile" \ + "$remount_period" & if [ -n "$freeze" ]; then __stress_scrub_freeze_loop "$end" "$runningfile" & diff --git a/ltp/fsstress.c b/ltp/fsstress.c index b395bc4da2..10608fb554 100644 --- a/ltp/fsstress.c +++ b/ltp/fsstress.c @@ -426,6 +426,7 @@ int symlink_path(const char *, pathname_t *); int truncate64_path(pathname_t *, off64_t); int unlink_path(pathname_t *); void usage(void); +void read_freq(void); void write_freq(void); void zero_freq(void); void non_btrfs_freq(const char *); @@ -472,7 +473,7 @@ int main(int argc, char **argv) xfs_error_injection_t err_inj; struct sigaction action; int loops = 1; - const char *allopts = "cd:e:f:i:l:m:M:n:o:p:rs:S:vVwx:X:zH"; + const char *allopts = "cd:e:f:i:l:m:M:n:o:p:rRs:S:vVwx:X:zH"; errrange = errtag = 0; umask(0); @@ -538,6 +539,9 @@ int main(int argc, char **argv) case 'r': namerand = 1; break; + case 'R': + read_freq(); + break; case 's': seed = strtoul(optarg, NULL, 0); break; @@ -1917,6 +1921,7 @@ usage(void) printf(" -o logfile specifies logfile name\n"); printf(" -p nproc specifies the no. of processes (default 1)\n"); printf(" -r specifies random name padding\n"); + printf(" -R zeros frequencies of write operations\n"); printf(" -s seed specifies the seed for the random generator (default random)\n"); printf(" -v specifies verbose mode\n"); printf(" -w zeros frequencies of non-write operations\n"); @@ -1928,6 +1933,17 @@ usage(void) printf(" -H prints usage and exits\n"); } +void +read_freq(void) +{ + opdesc_t *p; + + for (p = ops; p < ops_end; p++) { + if (p->iswrite) + p->freq = 0; + } +} + void write_freq(void) { diff --git a/tests/xfs/732 b/tests/xfs/732 new file mode 100755 index 0000000000..ed6fb3c977 --- /dev/null +++ b/tests/xfs/732 @@ -0,0 +1,38 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2022 Oracle. All Rights Reserved. +# +# FS QA Test No. 732 +# +# Race GETFSMAP and ro remount for a while to see if we crash or livelock. +# +. ./common/preamble +_begin_fstest auto quick fsmap remount + +# Override the default cleanup function. +_cleanup() +{ + cd / + _scratch_xfs_stress_scrub_cleanup + rm -rf $tmp.* +} + +# Import common functions. +. ./common/filter +. ./common/fuzzy +. ./common/xfs + +# real QA test starts here +_supported_fs xfs +_require_xfs_scratch_rmapbt +_require_xfs_io_command "fsmap" +_require_xfs_stress_scrub + +_scratch_mkfs > "$seqres.full" 2>&1 +_scratch_mount +_scratch_xfs_stress_scrub -r 5 -i 'fsmap -v' + +# success, all done +echo "Silence is golden" +status=0 +exit diff --git a/tests/xfs/732.out b/tests/xfs/732.out new file mode 100644 index 0000000000..451f82ce2d --- /dev/null +++ b/tests/xfs/732.out @@ -0,0 +1,2 @@ +QA output created by 732 +Silence is golden