On Wed, Feb 22, 2023 at 11:30:20AM -0800, Boris Burkov wrote: > btrfs recently had a bug where a direct io partial write resulted in a > hole in the file. Add a new generic test which creates a 2MiB file, > mmaps it, touches the first byte, then does an O_DIRECT write of the > mmapped buffer into a new file. This should result in the mapped pages > being a mix of in and out of page cache and thus a partial write, for > filesystems using iomap and IOMAP_DIO_PARTIAL. > > Signed-off-by: Boris Burkov <boris@xxxxxx> > --- > .gitignore | 1 + > src/Makefile | 2 +- > src/dio-buf-fault.c | 83 +++++++++++++++++++++++++++++++++++++++++++ > tests/generic/708 | 48 +++++++++++++++++++++++++ > tests/generic/708.out | 2 ++ > 5 files changed, 135 insertions(+), 1 deletion(-) > create mode 100644 src/dio-buf-fault.c > create mode 100755 tests/generic/708 > create mode 100644 tests/generic/708.out > > diff --git a/.gitignore b/.gitignore > index cfff8f85..644290f0 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -72,6 +72,7 @@ tags > /src/deduperace > /src/detached_mounts_propagation > /src/devzero > +/src/dio-buf-fault > /src/dio-interleaved > /src/dio-invalidate-cache > /src/dirhash_collide > diff --git a/src/Makefile b/src/Makefile > index a574f7bd..24cd4747 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -19,7 +19,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > t_ofd_locks t_mmap_collision mmap-write-concurrent \ > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \ > - t_mmap_cow_memory_failure fake-dump-rootino > + t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > diff --git a/src/dio-buf-fault.c b/src/dio-buf-fault.c > new file mode 100644 > index 00000000..36ff6710 > --- /dev/null > +++ b/src/dio-buf-fault.c > @@ -0,0 +1,83 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2023 Meta Platforms, Inc. All Rights Reserved. > + */ > + > +#ifndef _GNU_SOURCE > +#define _GNU_SOURCE /* to get definition of O_DIRECT flag. */ > +#endif > + > +#include <sys/mman.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <err.h> > +#include <errno.h> > +#include <fcntl.h> > +#include <stdio.h> > +#include <unistd.h> > + > +/* > + * mmap a source file, then do a direct write of that mmapped region to a > + * destination file. > + */ > + > +int prep_mmap_buffer(char *src_filename, int *fd, void **addr) > +{ > + struct stat st; > + int ret; > + > + *fd = open(src_filename, O_RDWR, 0666); > + if (*fd == -1) > + err(1, "failed to open %s", src_filename); > + > + ret = fstat(*fd, &st); > + if (ret) > + err(1, "failed to stat %d", *fd); > + > + *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, *fd, 0); > + if (*addr == MAP_FAILED) > + err(1, "failed to mmap %d", *fd); > + > + return st.st_size; > +} > + > +int do_dio(char *dst_filename, void *buf, size_t sz) > +{ > + int fd; > + ssize_t ret; > + > + fd = open(dst_filename, O_CREAT | O_TRUNC | O_WRONLY | O_DIRECT, 0666); > + if (fd == -1) > + err(1, "failed to open %s", dst_filename); > + while (sz) { > + ret = write(fd, buf, sz); > + if (ret < 0) { > + if (errno == -EINTR) > + continue; > + else > + err(1, "failed to write %lu bytes to %d", sz, fd); > + } else if (ret == 0) { > + break; > + } > + buf += ret; > + sz -= ret; > + } > + return sz; > +} > + > +int main(int argc, char *argv[]) { > + size_t sz; > + int fd; > + void *buf = NULL; > + char c; > + > + if (argc != 3) > + errx(1, "no in and out file name arguments given"); > + sz = prep_mmap_buffer(argv[1], &fd, &buf); ^^ What's the fd for? I didn't see you use it in main function after this line. > + > + /* touch the first page of the mapping to bring it into cache */ > + c = ((char *)buf)[0]; > + printf("%u\n", c); > + > + do_dio(argv[2], buf, sz); > +} > diff --git a/tests/generic/708 b/tests/generic/708 > new file mode 100755 > index 00000000..ff2e162b > --- /dev/null > +++ b/tests/generic/708 > @@ -0,0 +1,48 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) 2023 Meta Platforms, Inc. All Rights Reserved. > +# > +# FS QA Test 708 > +# > +# Test iomap direct_io partial writes. > +# > +# Create a reasonably large file, then run a program which mmaps it, > +# touches the first page, then dio writes it to a second file. This > +# can result in a page fault reading from the mmapped dio write buffer and > +# thus the iompap direct_io partial write codepath. > +# > +. ./common/preamble > +_begin_fstest quick auto > +_fixed_by_kernel_commit XXXX 'btrfs: fix dio continue after short write due to buffer page fault' > + > +# Override the default cleanup function. > +_cleanup() > +{ > + cd / > + rm -r -f $tmp.* > + rm -f $TEST_DIR/dio-buf-fault.* > +} > + > +# Import common functions. > +. ./common/filter Do you use any filter functions in this case? > + > +# real QA test starts here > + > +# Modify as appropriate. ^^^ This comment can be removed. > +_supported_fs generic > +_require_test > +_require_odirect > +_require_test_program dio-buf-fault > +src=$TEST_DIR/dio-buf-fault.src > +dst=$TEST_DIR/dio-buf-fault.dst I prefer using $seq to reduce the possibility of duplicate file names in $TEST_DIR. E.g: src=$TEST_DIR/dio-buf-fault-${seq}.src dst=$TEST_DIR/dio-buf-fault-${seq}.dst > + > +echo "Silence is golden" > + Due to the $TEST_DIR isn't always clean, so better to remove the $src and $dst files before below testing. e.g. rm -rf $src $dst BTW, if you'd like, you can remove the specific _cleanup() of this cases. Due to the $src and $dst are not big, we can keep them in $TEST_DIR. Either way is good to me, you can decide that by yourself in next version. > +$XFS_IO_PROG -fc "pwrite -q 0 $((2 * 1024 * 1024))" $src > +sync > +$here/src/dio-buf-fault $src $dst >> $seqres.full || _fail "failed doing the dio copy" > +diff $src $dst > + > +# success, all done > +status=$? > +exit > diff --git a/tests/generic/708.out b/tests/generic/708.out > new file mode 100644 > index 00000000..33c478ad > --- /dev/null > +++ b/tests/generic/708.out > @@ -0,0 +1,2 @@ > +QA output created by 708 > +Silence is golden > -- > 2.39.1 >