On Wed, Nov 02, 2022 at 03:36:21PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@xxxxxxxxxx> > > Add new helpers to dmerror to provide for marking selected ranges > totally bad -- both reads and writes will fail. Create a new test for > xfs_scrub to check that it reports media errors in data files correctly. > > Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> > --- > common/dmerror | 140 ++++++++++++++++++++++++++++++++++++++++++++++++ > common/xfs | 9 +++ > tests/xfs/747 | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > tests/xfs/747.out | 12 ++++ > 4 files changed, 316 insertions(+) > create mode 100755 tests/xfs/747 > create mode 100644 tests/xfs/747.out > > > diff --git a/common/dmerror b/common/dmerror > index 54122b12ea..ca71fc11b4 100644 > --- a/common/dmerror > +++ b/common/dmerror > @@ -250,3 +250,143 @@ _dmerror_load_working_table() > [ $load_res -ne 0 ] && _fail "dmsetup failed to load error table" > [ $resume_res -ne 0 ] && _fail "dmsetup resume failed" > } > + > +# Given a list of (start, length) tuples on stdin, combine adjacent tuples into > +# larger ones and write the new list to stdout. > +__dmerror_combine_extents() > +{ > + awk 'BEGIN{start = 0; len = 0;}{ > +if (start + len == $1) { > + len += $2; > +} else { > + if (len > 0) > + printf("%d %d\n", start, len); > + start = $1; > + len = $2; > +} > +} END { > + if (len > 0) > + printf("%d %d\n", start, len); > +}' > +} > + > +# Given a block device, the name of a preferred dm target, the name of an > +# implied dm target, and a list of (start, len) tuples on stdin, create a new > +# dm table which maps each of the tuples to the preferred target and all other > +# areas to the implied dm target. > +__dmerror_recreate_map() > +{ > + local device="$1" > + local preferred_tgt="$2" > + local implied_tgt="$3" > + local size=$(blockdev --getsz "$device") > + > + local awk_program=' > + BEGIN { > + implied_start = 0; > + } > + { > + extent_start = $1; > + extent_len = $2; > + > + if (extent_start > size) { > + extent_start = size; > + extent_len = 0; > + } else if (extent_start + extent_len > size) { > + extent_len = size - extent_start; > + } > + > + if (implied_start < extent_start) > + printf("%d %d %s %s %d\n", implied_start, > + extent_start - implied_start, > + implied_tgt, device, implied_start); > + printf("%d %d %s %s %d\n", extent_start, extent_len, > + preferred_tgt, device, extent_start); > + implied_start = extent_start + extent_len; > + } > + END { > + if (implied_start < size) > + printf("%d %d %s %s %d\n", implied_start, > + size - implied_start, implied_tgt, > + device, implied_start); > + }' > + > + awk -v device="$device" -v size=$size -v implied_tgt="$implied_tgt" \ > + -v preferred_tgt="$preferred_tgt" "$awk_program" Hi, OK, if you prefer this way, should __dmerror_combine_extents follow this way too :) > +} > + > +# Update the dm error table so that the range (start, len) maps to the > +# preferred dm target, overriding anything that maps to the implied dm target. > +# This assumes that the only desired targets for this dm device are the > +# preferred and and implied targets. The fifth argument is the scratch device > +# that we want to change the table for. > +__dmerror_change() > +{ > + local start="$1" > + local len="$2" > + local preferred_tgt="$3" > + local implied_tgt="$4" > + local whichdev="$5" > + local old_table > + local new_table > + > + case "$whichdev" in > + "SCRATCH_DEV"|"") whichdev="$SCRATCH_DEV";; > + "SCRATCH_LOGDEV"|"LOG") whichdev="$NON_ERROR_LOGDEV";; > + "SCRATCH_RTDEV"|"RT") whichdev="$NON_ERROR_RTDEV";; > + esac > + > + case "$whichdev" in > + "$SCRATCH_DEV") old_table="$DMERROR_TABLE";; > + "$NON_ERROR_LOGDEV") old_table="$DMERROR_LOGTABLE";; > + "$NON_ERROR_RTDEV") old_table="$DMERROR_RTTABLE";; > + *) > + echo "$whichdev: Unknown dmerror device." > + return > + ;; > + esac > + > + new_table="$( (echo "$old_table"; echo "$start $len $preferred_tgt") | \ > + awk -v type="$preferred_tgt" '{if ($3 == type) print $0;}' | \ > + sort -g | \ > + __dmerror_combine_extents | \ > + __dmerror_recreate_map "$whichdev" "$preferred_tgt" \ > + "$implied_tgt" )" > + > + case "$whichdev" in > + "$SCRATCH_DEV") DMERROR_TABLE="$new_table";; > + "$NON_ERROR_LOGDEV") DMERROR_LOGTABLE="$new_table";; > + "$NON_ERROR_RTDEV") DMERROR_RTTABLE="$new_table";; > + esac > +} > + > +# Reset the dm error table to everything ok. The dm device itself must be > +# remapped by calling _dmerror_load_error_table. > +_dmerror_reset_table() > +{ > + DMERROR_TABLE="$DMLINEAR_TABLE" > + DMERROR_LOGTABLE="$DMLINEAR_LOGTABLE" > + DMERROR_RTTABLE="$DMLINEAR_RTTABLE" > +} > + > +# Update the dm error table so that IOs to the given range will return EIO. > +# The dm device itself must be remapped by calling _dmerror_load_error_table. > +_dmerror_mark_range_bad() > +{ > + local start="$1" > + local len="$2" > + local dev="$3" > + > + __dmerror_change "$start" "$len" error linear "$dev" > +} > + > +# Update the dm error table so that IOs to the given range will succeed. > +# The dm device itself must be remapped by calling _dmerror_load_error_table. > +_dmerror_mark_range_good() > +{ > + local start="$1" > + local len="$2" > + local dev="$3" > + > + __dmerror_change "$start" "$len" linear error "$dev" > +} > diff --git a/common/xfs b/common/xfs > index 8ac1964e9c..f466d2c42f 100644 > --- a/common/xfs > +++ b/common/xfs > @@ -218,6 +218,15 @@ _xfs_get_dir_blocksize() > $XFS_INFO_PROG "$fs" | sed -n "s/^naming.*bsize=\([[:digit:]]*\).*/\1/p" > } > > +# Decide if this path is a file on the realtime device > +_xfs_is_realtime_file() > +{ > + if [ "$USE_EXTERNAL" != "yes" ] || [ -z "$SCRATCH_RTDEV" ]; then > + return 1 > + fi > + $XFS_IO_PROG -c 'stat -v' "$1" | grep -q -w realtime > +} > + > # Set or clear the realtime status of every supplied path. The first argument > # is either 'data' or 'realtime'. All other arguments should be paths to > # existing directories or empty regular files. > diff --git a/tests/xfs/747 b/tests/xfs/747 > new file mode 100755 > index 0000000000..8b828bc48d > --- /dev/null > +++ b/tests/xfs/747 > @@ -0,0 +1,155 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0-or-later > +# Copyright (c) 2022 Oracle. All Rights Reserved. > +# > +# FS QA Test No. 747 > +# > +# Check xfs_scrub's media scan can actually return diagnostic information for > +# media errors in file data extents. > + > +. ./common/preamble > +_begin_fstest auto quick scrub Do we need "eio" at here? Others look good to me. Thanks, Zorro > + > +# Override the default cleanup function. > +_cleanup() > +{ > + cd / > + rm -f $tmp.* > + _dmerror_cleanup > +} > + > +# Import common functions. > +. ./common/fuzzy > +. ./common/filter > +. ./common/dmerror > + > +# real QA test starts here > +_supported_fs xfs > +_require_dm_target error > +_require_scratch > +_require_scratch_xfs_crc > +_require_scrub > + > +filter_scrub_errors() { > + _filter_scratch | sed \ > + -e "s/offset $((fs_blksz * 2)) /offset 2FSB /g" \ > + -e "s/length $fs_blksz.*/length 1FSB./g" > +} > + > +_scratch_mkfs >> $seqres.full > +_dmerror_init > +_dmerror_mount >> $seqres.full 2>&1 > + > +_supports_xfs_scrub $SCRATCH_MNT $SCRATCH_DEV || _notrun "Scrub not supported" > + > +# Write a file with 4 file blocks worth of data > +victim=$SCRATCH_MNT/a > +file_blksz=$(_get_file_block_size $SCRATCH_MNT) > +$XFS_IO_PROG -f -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c "fsync" $victim >> $seqres.full > +unset errordev > +_xfs_is_realtime_file $victim && errordev="RT" > +bmap_str="$($XFS_IO_PROG -c "bmap -elpv" $victim | grep "^[[:space:]]*0:")" > +echo "$errordev:$bmap_str" >> $seqres.full > + > +phys="$(echo "$bmap_str" | $AWK_PROG '{print $3}')" > +if [ "$errordev" = "RT" ]; then > + len="$(echo "$bmap_str" | $AWK_PROG '{print $4}')" > +else > + len="$(echo "$bmap_str" | $AWK_PROG '{print $6}')" > +fi > +fs_blksz=$(_get_block_size $SCRATCH_MNT) > +echo "file_blksz:$file_blksz:fs_blksz:$fs_blksz" >> $seqres.full > +kernel_sectors_per_fs_block=$((fs_blksz / 512)) > + > +# Did we get at least 4 fs blocks worth of extent? > +min_len_sectors=$(( 4 * kernel_sectors_per_fs_block )) > +test "$len" -lt $min_len_sectors && \ > + _fail "could not format a long enough extent on an empty fs??" > + > +phys_start=$(echo "$phys" | sed -e 's/\.\..*//g') > + > +echo "$errordev:$phys:$len:$fs_blksz:$phys_start" >> $seqres.full > +echo "victim file:" >> $seqres.full > +od -tx1 -Ad -c $victim >> $seqres.full > + > +# Set the dmerror table so that all IO will pass through. > +_dmerror_reset_table > + > +cat >> $seqres.full << ENDL > +dmerror before: > +$DMERROR_TABLE > +$DMERROR_RTTABLE > +<end table> > +ENDL > + > +# All sector numbers that we feed to the kernel must be in units of 512b, but > +# they also must be aligned to the device's logical block size. > +logical_block_size=$(_min_dio_alignment $SCRATCH_DEV) > +kernel_sectors_per_device_lba=$((logical_block_size / 512)) > + > +# Mark as bad one of the device LBAs in the middle of the extent. Target the > +# second LBA of the third block of the four-block file extent that we allocated > +# earlier, but without overflowing into the fourth file block. > +bad_sector=$(( phys_start + (2 * kernel_sectors_per_fs_block) )) > +bad_len=$kernel_sectors_per_device_lba > +if (( kernel_sectors_per_device_lba < kernel_sectors_per_fs_block )); then > + bad_sector=$((bad_sector + kernel_sectors_per_device_lba)) > +fi > +if (( (bad_sector % kernel_sectors_per_device_lba) != 0)); then > + echo "bad_sector $bad_sector not congruent with device logical block size $logical_block_size" > +fi > +_dmerror_mark_range_bad $bad_sector $bad_len $errordev > + > +cat >> $seqres.full << ENDL > +dmerror after marking bad: > +$DMERROR_TABLE > +$DMERROR_RTTABLE > +<end table> > +ENDL > + > +_dmerror_load_error_table > + > +# See if the media scan picks it up. > +echo "Scrub for injected media error (single threaded)" > + > +# Once in single-threaded mode > +_scratch_scrub -b -x >> $seqres.full 2> $tmp.error > +cat $tmp.error | filter_scrub_errors > + > +# Once in parallel mode > +echo "Scrub for injected media error (multi threaded)" > +_scratch_scrub -x >> $seqres.full 2> $tmp.error > +cat $tmp.error | filter_scrub_errors > + > +# Remount to flush the page cache and reread to see the IO error > +_dmerror_unmount > +_dmerror_mount > +echo "victim file:" >> $seqres.full > +od -tx1 -Ad -c $victim >> $seqres.full 2> $tmp.error > +cat $tmp.error | sed -e 's/read error: //g' | _filter_scratch > + > +# Scrub again to re-confirm the media error across a remount > +echo "Scrub for injected media error (after remount)" > +_scratch_scrub -x >> $seqres.full 2> $tmp.error > +cat $tmp.error | filter_scrub_errors > + > +# Now mark the bad range good so that a retest shows no media failure. > +_dmerror_mark_range_good $bad_sector $bad_len $errordev > +_dmerror_load_error_table > + > +cat >> $seqres.full << ENDL > +dmerror after marking good: > +$DMERROR_TABLE > +$DMERROR_RTTABLE > +<end table> > +ENDL > + > +echo "Scrub after removing injected media error" > + > +# Scrub one last time to make sure the error's gone. > +_scratch_scrub -x >> $seqres.full 2> $tmp.error > +cat $tmp.error | filter_scrub_errors > + > +# success, all done > +status=0 > +exit > diff --git a/tests/xfs/747.out b/tests/xfs/747.out > new file mode 100644 > index 0000000000..714ceb2e56 > --- /dev/null > +++ b/tests/xfs/747.out > @@ -0,0 +1,12 @@ > +QA output created by 747 > +Scrub for injected media error (single threaded) > +Unfixable Error: SCRATCH_MNT/a: media error at data offset 2FSB length 1FSB. > +SCRATCH_MNT: unfixable errors found: 1 > +Scrub for injected media error (multi threaded) > +Unfixable Error: SCRATCH_MNT/a: media error at data offset 2FSB length 1FSB. > +SCRATCH_MNT: unfixable errors found: 1 > +od: SCRATCH_MNT/a: Input/output error > +Scrub for injected media error (after remount) > +Unfixable Error: SCRATCH_MNT/a: media error at data offset 2FSB length 1FSB. > +SCRATCH_MNT: unfixable errors found: 1 > +Scrub after removing injected media error >