Adds a test for a bug we encountered on Linux 6.4 on aarch64, where a race could mean that csums weren't getting written to the log tree, leading to corruption when it was replayed. The patches to detect log this tree corruption are in btrfs-progs 6.11. Signed-off-by: Mark Harmstone <maharmstone@xxxxxx> --- common/dmlogwrites | 24 ++++++++++++++++++ tests/btrfs/192 | 26 +------------------- tests/btrfs/333 | 59 +++++++++++++++++++++++++++++++++++++++++++++ tests/btrfs/333.out | 2 ++ 4 files changed, 86 insertions(+), 25 deletions(-) create mode 100755 tests/btrfs/333 create mode 100644 tests/btrfs/333.out diff --git a/common/dmlogwrites b/common/dmlogwrites index c1c85de9..f7faf244 100644 --- a/common/dmlogwrites +++ b/common/dmlogwrites @@ -203,3 +203,27 @@ _log_writes_replay_log_range() >> $seqres.full 2>&1 [ $? -ne 0 ] && _fail "replay failed" } + +# Replay and check each fua/flush (specified by $2) point. +# +# Since dm-log-writes records bio sequentially, even just replaying a range +# still needs to iterate all records before the end point. +# When number of records grows, it will be unacceptably slow, thus we need +# to use relay-log itself to trigger fsck, avoid unnecessary seek. +_log_writes_fast_replay_check() +{ + local check_point=$1 + local blkdev=$2 + local fsck_command=$3 + local ret + + [ -z "$check_point" -o -z "$blkdev" ] && _fail \ + "check_point and blkdev must be specified for log_writes_fast_replay_check" + + $here/src/log-writes/replay-log --log $LOGWRITES_DEV \ + --replay $blkdev --check $check_point --fsck "$fsck_command" \ + &> $tmp.full_fsck + ret=$? + tail -n 150 $tmp.full_fsck >> $seqres.full + [ $ret -ne 0 ] && _fail "fsck failed during replay" +} diff --git a/tests/btrfs/192 b/tests/btrfs/192 index f7fb65b8..449f0459 100755 --- a/tests/btrfs/192 +++ b/tests/btrfs/192 @@ -96,30 +96,6 @@ delete_workload() done } -# Replay and check each fua/flush (specified by $2) point. -# -# Since dm-log-writes records bio sequentially, even just replaying a range -# still needs to iterate all records before the end point. -# When number of records grows, it will be unacceptably slow, thus we need -# to use relay-log itself to trigger fsck, avoid unnecessary seek. -log_writes_fast_replay_check() -{ - local check_point=$1 - local blkdev=$2 - local fsck_command="$BTRFS_UTIL_PROG check $blkdev" - local ret - - [ -z "$check_point" -o -z "$blkdev" ] && _fail \ - "check_point and blkdev must be specified for log_writes_fast_replay_check" - - $here/src/log-writes/replay-log --log $LOGWRITES_DEV \ - --replay $blkdev --check $check_point --fsck "$fsck_command" \ - &> $tmp.full_fsck - ret=$? - tail -n 150 $tmp.full_fsck >> $seqres.full - [ $ret -ne 0 ] && _fail "fsck failed during replay" -} - xattr_value=$(printf '%0.sX' $(seq 1 3800)) # Bumping tree height to level 2. @@ -145,7 +121,7 @@ wait _log_writes_unmount _log_writes_remove -log_writes_fast_replay_check fua "$SCRATCH_DEV" +_log_writes_fast_replay_check fua "$SCRATCH_DEV" "$BTRFS_UTIL_PROG check $SCRATCH_DEV" echo "Silence is golden" diff --git a/tests/btrfs/333 b/tests/btrfs/333 new file mode 100755 index 00000000..13f113ca --- /dev/null +++ b/tests/btrfs/333 @@ -0,0 +1,59 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# FS QA Test 333 +# +# Test async dio with fsync to test a bug where a race meant that csums weren't +# getting written to the log tree, causing corruptions on remount. This can be +# seen on subpage FSes on 6.4. +# +. ./common/preamble +_begin_fstest auto quick metadata log volume + +_fixed_by_kernel_commit e917ff56c8e7 \ + "btrfs: determine synchronous writers from bio or writeback control" + +fio_config=$tmp.fio + +. ./common/dmlogwrites + +_require_scratch +_require_log_writes + +cat >$fio_config <<EOF +[global] +iodepth=128 +direct=1 +ioengine=libaio +rw=randwrite +runtime=1s +[job0] +rw=randwrite +filename=$SCRATCH_MNT/file +size=1g +fdatasync=1 +EOF + +_require_fio $fio_config + +cat $fio_config >> $seqres.full + +_log_writes_init $SCRATCH_DEV +_log_writes_mkfs >> $seqres.full 2>&1 +_log_writes_mark mkfs + +_log_writes_mount + +$FIO_PROG $fio_config > /dev/null 2>&1 +_log_writes_unmount + +_log_writes_remove +_log_writes_replay_log mkfs $SCRATCH_DEV + +_log_writes_fast_replay_check fua "$SCRATCH_DEV" "$BTRFS_UTIL_PROG check $SCRATCH_DEV" + +echo "Silence is golden" + +# success, all done +status=0 +exit diff --git a/tests/btrfs/333.out b/tests/btrfs/333.out new file mode 100644 index 00000000..60a15898 --- /dev/null +++ b/tests/btrfs/333.out @@ -0,0 +1,2 @@ +QA output created by 333 +Silence is golden -- 2.44.2