[PATCH] Add new tests/generic/536: intermittent I/O errors must not corrupt a filesystem

Edwin Török <edvin.torok@xxxxxxxxxx> · Thu, 21 Mar 2019 10:30:46 +0000

Based on tests/generic/347.

In our lab we've found that if multiple iSCSI connection errors are
detected (without completely loosing the iSCSI connection) then the GFS2
filesystem becomes corrupt due to differences in filesystem and device blocksizes.
Add a test that explicitly checks for this by simulating I/O errors
deterministically with dm-thin.
Changing the blocksize to 512 would make this test pass on GFS as well.

This test causes I/O errors on purpose, and thus it is expected that a filesystem
might withdraw itself, or remount readonly.
However when mounting the filesystem again it should be usable,
without corruption (i.e. immediately unmount itself again).

Tested that this passes on ext4 and fails on GFS2.

CC: Mark Syms <Mark.Syms@xxxxxxxxxx>
CC: Tim Smith <Tim.Smith@xxxxxxxxxx>
CC: Ross Lagerwall <Ross.Lagerwall@xxxxxxxxxx>
Signed-off-by: Edwin Török <edvin.torok@xxxxxxxxxx>
---
 common/dmthin         |  6 ++++
 tests/generic/536     | 80 +++++++++++++++++++++++++++++++++++++++++++
 tests/generic/536.out |  2 ++
 tests/generic/group   |  1 +
 4 files changed, 89 insertions(+)
 create mode 100755 tests/generic/536
 create mode 100644 tests/generic/536.out

diff --git a/common/dmthin b/common/dmthin
index 7946e9a7..28f2ef9c 100644
--- a/common/dmthin
+++ b/common/dmthin
@@ -41,6 +41,12 @@ _dmthin_check_fs()
 	_check_scratch_fs $DMTHIN_VOL_DEV
 }
 
+_dmthin_cycle_mount()
+{
+	$UMOUNT_PROG $SCRATCH_MNT > /dev/null 2>&1
+	_dmthin_mount
+}
+
 # Set up a dm-thin device on $SCRATCH_DEV
 #
 # All arguments are optional, and in this order; defaults follows:
diff --git a/tests/generic/536 b/tests/generic/536
new file mode 100755
index 00000000..0e8d137e
--- /dev/null
+++ b/tests/generic/536
@@ -0,0 +1,80 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
+#
+# FS QA Test No. generic/536
+#
+# Test that intermittent IO errors during pwrite do not cause filesystem corruption
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+BACKING_SIZE=$((500 * 1024 * 1024 / 512))	# 500M
+VIRTUAL_SIZE=$((10 * $BACKING_SIZE))		# 5000M
+GROW_SIZE=$((100 * 1024 * 1024 / 512))		# 100M
+
+_cleanup()
+{
+	_dmthin_cleanup
+	rm -f $tmp.*
+}
+
+_setup_thin()
+{
+	_dmthin_init $BACKING_SIZE $VIRTUAL_SIZE
+	_dmthin_set_queue
+	_mkfs_dev $DMTHIN_VOL_DEV
+	_dmthin_mount
+}
+
+_workout()
+{
+	# Overfill it by a bit
+	for I in `seq 1 500`; do
+		$XFS_IO_PROG -f -c "pwrite -W 0 1M" $SCRATCH_MNT/file$I &>/dev/null
+	done
+
+	sync
+
+	_dmthin_grow  $GROW_SIZE
+
+	# Write a little more, but don't fill
+	for I in `seq 501 510`; do
+		$XFS_IO_PROG -f -c "pwrite 0 1M" $SCRATCH_MNT/file$I &>/dev/null
+	done
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/dmthin
+
+_supported_fs generic
+_supported_os Linux
+_require_scratch_nocheck
+_require_dm_target thin-pool
+
+_setup_thin
+
+# trigger IO errors, the filesystem may be remounted RO or withdrawn, this is expected
+_workout
+
+# now remount the filesystem without triggering IO errors,
+# and check that the filesystem is not corrupt
+_dmthin_cycle_mount
+# ls --color makes ls stat each file, which finds the corruption
+ls --color=always $SCRATCH_MNT/ >/dev/null || _fail "Failed to list filesystem after remount"
+ls --color=always $SCRATCH_MNT/ >/dev/null || _fail "Failed to list filesystem after remount"
+ls --color=always $SCRATCH_MNT/ >/dev/null || _fail "Failed to list filesystem after remount"
+_dmthin_cleanup
+
+echo "=== completed"
+
+status=0
+exit
diff --git a/tests/generic/536.out b/tests/generic/536.out
new file mode 100644
index 00000000..5140d261
--- /dev/null
+++ b/tests/generic/536.out
@@ -0,0 +1,2 @@
+QA output created by 536
+=== completed
diff --git a/tests/generic/group b/tests/generic/group
index 78b9b45d..a346dfab 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -538,3 +538,4 @@
 533 auto quick attr
 534 auto quick log
 535 auto quick log
+536 auto quick rw thin
-- 
2.19.1