[PATCH] fstests: add a generic test to verify direct IO writes with buffer contents change

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



There is a long existing btrfs problem that if some one is modifying the
buffer of an on-going direct IO write, it has a very high chance causing
permanent data checksum mismatch.

This is caused by the following factors:

- Direct IO buffer is out of the control of filesystem
  Thus user space can modify the contents during writeback.

- Btrfs generate its data checksum just before submitting the bio
  This means if the contents happens after the checksum is generated,
  the data written to disk will no longer match the checksum.

Btrfs later fixes the problem by forcing the direct IO to fallback to
buffered IO (if the inode requires data checksum), so that btrfs can
have a consistent view of the buffer.

This test case will verify the behavior by:

- Create a helper program 'dio-writeback-race'
  Which does direct IO writes block-by-block, and the buffer is always
  initialized to all 0xff before write,
  Then starting two threads:
  - One to submit the direct IO
  - One to modify the buffer to 0x00

  The program uses 4K as default block size, and 64MiB as the default
  file size.
  Which is more than enough to trigger tons of btrfs checksum errors
  on unpatched kernels.

- New test case generic/761
  Which will:

  * Use above program to create a 64MiB file

  * Do buffered read on that file
    Since above program is doing direct IO, there is no page cache
    populated.
    And the buffered read will need to read out all data from the disk,
    and if the filesystem supports data checksum, then the data checksum
    will also be verified against the data.

The test case passes on the following fses:
- ext4
- xfs
- btrfs with "nodatasum" mount option
  No data checksum to bother.

- btrfs with default "datasum" mount option and the fix "btrfs: always
  fallback to buffered write if the inode requires checksum"
  This makes btrfs to fallback on buffered IO so the contents won't
  change during writeback of page cache.

And fails on the following fses:

- btrfs with default "datasum" mount option and without the fix
  Expected.

Suggested-by: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
---
 .gitignore               |   1 +
 src/Makefile             |   3 +-
 src/dio-writeback-race.c | 143 +++++++++++++++++++++++++++++++++++++++
 tests/generic/761        |  50 ++++++++++++++
 tests/generic/761.out    |   2 +
 5 files changed, 198 insertions(+), 1 deletion(-)
 create mode 100644 src/dio-writeback-race.c
 create mode 100755 tests/generic/761
 create mode 100644 tests/generic/761.out

diff --git a/.gitignore b/.gitignore
index efd47773..7060f52c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -210,6 +210,7 @@ tags
 /src/perf/*.pyc
 /src/fiemap-fault
 /src/min_dio_alignment
+/src/dio-writeback-race
 
 # Symlinked files
 /tests/generic/035.out
diff --git a/src/Makefile b/src/Makefile
index 1417c383..6ac72b36 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -20,7 +20,8 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
 	t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
 	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
 	t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
-	readdir-while-renames dio-append-buf-fault dio-write-fsync-same-fd
+	readdir-while-renames dio-append-buf-fault dio-write-fsync-same-fd \
+	dio-writeback-race
 
 LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
diff --git a/src/dio-writeback-race.c b/src/dio-writeback-race.c
new file mode 100644
index 00000000..01cce50b
--- /dev/null
+++ b/src/dio-writeback-race.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * dio_writeback_race -- test direct IO writes with the contents of
+ * the buffer changed during writeback.
+ *
+ * Copyright (C) 2025 SUSE Linux Products GmbH.
+ */
+
+/*
+ * dio_writeback_race
+ *
+ * Compile:
+ *
+ *   gcc -Wall -pthread -o dio_writeback_race dio_writeback_race.c
+ *
+ * Make sure filesystems with explicit data checksum to handle direct IO
+ * writes correctly, so that even the contents of the direct IO buffer changes
+ * during writeback, the fs should still work fine without data checksum mismatch.
+ * (Normally by falling back to buffer IO to keep the checksum and contents
+ *  consistent)
+ *
+ * Usage:
+ *
+ *   dio_writeback_race [-b <buffersize>] [-s <filesize>] <filename>
+ *
+ * Where:
+ *
+ *   <filename> is the name of the test file to create, if the file exists
+ *   it will be overwritten.
+ *
+ *   <buffersize> is the buffer size for direct IO. Users are responsible to
+ *   probe the correct direct IO size and alignment requirement.
+ *   If not specified, the default value will be PAGE_SIZE.
+ *
+ *   <filesize> is the total size of the test file. If not aligned to <blocksize>
+ *   the result file size will be rounded up to <blocksize>.
+ *   If not specified, the default value will be 64MiB.
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+static int fd = -1;
+static void *buf = NULL;
+static unsigned long long filesize = 64 * 1024 * 1024;
+static int blocksize = 4096;
+
+const int orig_pattern = 0xff;
+const int modify_pattern = 0x00;
+
+static void *do_io(void *arg)
+{
+	int ret;
+
+	ret = write(fd, buf, blocksize);
+	pthread_exit((void *)(unsigned long long)ret);
+}
+
+static void *do_modify(void *arg)
+{
+	memset(buf, modify_pattern, blocksize);
+	pthread_exit(NULL);
+}
+
+int main (int argc, char *argv[])
+{
+	pthread_t io_thread;
+	pthread_t modify_thread;
+	unsigned long long filepos;
+	int opt;
+	int ret;
+
+	while ((opt = getopt(argc, argv, "b:s:")) > 0) {
+		switch (opt) {
+		case 'b':
+			blocksize = atoi(optarg);
+			if (blocksize == 0) {
+				fprintf(stderr, "invalid blocksize '%s'\n", optarg);
+				goto error;
+			}
+			break;
+		case 's':
+			filesize = atoll(optarg);
+			if (filesize == 0) {
+				fprintf(stderr, "invalid blocksize '%s'\n", optarg);
+				goto error;
+			}
+			break;
+		default:
+			fprintf(stderr, "unknown option '%c'\n", opt);
+			goto error;
+		}
+	}
+	if (optind >= argc) {
+		fprintf(stderr, "missing argument\n");
+		goto error;
+	}
+	ret = posix_memalign(&buf, blocksize, blocksize);
+	if (!buf) {
+		fprintf(stderr, "failed to allocate aligned memory\n");
+		exit(EXIT_FAILURE);
+	}
+	fd = open(argv[optind], O_DIRECT | O_WRONLY | O_CREAT);
+	if (fd < 0) {
+		fprintf(stderr, "failed to open file '%s': %m\n", argv[2]);
+		goto error;
+	}
+
+	for (filepos = 0; filepos < filesize; filepos += blocksize) {
+		void *retval = NULL;
+
+		memset(buf, orig_pattern, blocksize);
+		pthread_create(&io_thread, NULL, do_io, NULL);
+		pthread_create(&modify_thread, NULL, do_modify, NULL);
+		ret = pthread_join(io_thread, retval);
+		if (ret < 0) {
+			errno = -ret;
+			fprintf(stderr, "failed to join io thread: %m\n");
+			goto error;
+		}
+		if (retval) {
+			fprintf(stderr, "io thread failed\n");
+			goto error;
+		}
+		ret = pthread_join(modify_thread, NULL);
+		if (ret < 0) {
+			errno = -ret;
+			fprintf(stderr, "failed to join modify thread: %m\n");
+			goto error;
+		}
+	}
+error:
+	close(fd);
+	free(buf);
+	return EXIT_FAILURE;
+}
diff --git a/tests/generic/761 b/tests/generic/761
new file mode 100755
index 00000000..24f45cb0
--- /dev/null
+++ b/tests/generic/761
@@ -0,0 +1,50 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2025 SUSE Linux Products GmbH. All Rights Reserved.
+#
+# FS QA Test 761
+#
+# Making sure direct IO (O_DIRECT) writes won't cause any data checksum mismatch,
+# even if the contents of the buffer changes during writeback.
+#
+# This is mostly for filesystems with data checksum support, which should fallback
+# to buffer IO to avoid inconsistency.
+# For filesystems without data checksum support, nothing needs to be bothered.
+#
+
+. ./common/preamble
+_begin_fstest auto quick
+
+# Override the default cleanup function.
+# _cleanup()
+# {
+# 	cd /
+# 	rm -r -f $tmp.*
+# }
+
+# Import common functions.
+# . ./common/filter
+
+# Modify as appropriate.
+_require_scratch
+_require_odirect
+_require_test_program dio-writeback-race
+
+_scratch_mkfs > $seqres.full 2>&1
+_scratch_mount
+
+blocksize=$(_get_block_size $SCRATCH_MNT)
+filesize=$(( 64 * 1024 * 1024))
+
+echo "blocksize=$blocksize filesize=$filesize" >> $seqres.full
+
+$here/src/dio-writeback-race -b $blocksize -s $filesize $SCRATCH_MNT/foobar
+
+# Read out the file, which should trigger checksum verification
+cat $SCRATCH_MNT/foobar > /dev/null
+
+echo "Silence is golden"
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/761.out b/tests/generic/761.out
new file mode 100644
index 00000000..72ebba4c
--- /dev/null
+++ b/tests/generic/761.out
@@ -0,0 +1,2 @@
+QA output created by 761
+Silence is golden
-- 
2.47.1





[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux