Re: [PATCH] generic: test concurrent direct IO writes and fsync using same fd

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



On Fri, Aug 30, 2024 at 12:10:21AM +0100, fdmanana@xxxxxxxxxx wrote:
> From: Filipe Manana <fdmanana@xxxxxxxx>
> 
> Test that a program that has 2 threads using the same file descriptor and
> concurrently doing direct IO writes and fsync doesn't trigger any crash
> or deadlock.
> 
> This is motivated by a bug found in btrfs fixed by the following patch:
> 
>   "btrfs: fix race between direct IO write and fsync when using same fd"
> 
> Signed-off-by: Filipe Manana <fdmanana@xxxxxxxx>
> ---
>  .gitignore                    |   1 +
>  src/Makefile                  |   2 +-
>  src/dio-write-fsync-same-fd.c | 106 ++++++++++++++++++++++++++++++++++
>  tests/generic/363             |  30 ++++++++++
>  tests/generic/363.out         |   2 +
>  5 files changed, 140 insertions(+), 1 deletion(-)
>  create mode 100644 src/dio-write-fsync-same-fd.c
>  create mode 100755 tests/generic/363
>  create mode 100644 tests/generic/363.out
> 
> diff --git a/.gitignore b/.gitignore
> index 36083e9d..57519263 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -76,6 +76,7 @@ tags
>  /src/dio-buf-fault
>  /src/dio-interleaved
>  /src/dio-invalidate-cache
> +/src/dio-write-fsync-same-fd
>  /src/dirhash_collide
>  /src/dirperf
>  /src/dirstress
> diff --git a/src/Makefile b/src/Makefile
> index b3da59a0..b9ad6b5f 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
>  	t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
>  	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
>  	t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
> -	readdir-while-renames dio-append-buf-fault
> +	readdir-while-renames dio-append-buf-fault dio-write-fsync-same-fd
>  
>  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>  	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> diff --git a/src/dio-write-fsync-same-fd.c b/src/dio-write-fsync-same-fd.c
> new file mode 100644
> index 00000000..79472a9e
> --- /dev/null
> +++ b/src/dio-write-fsync-same-fd.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2024 SUSE Linux Products GmbH.  All Rights Reserved.
> + */
> +
> +/*
> + * Test two threads working with the same file descriptor, one doing direct IO
> + * writes into the file and the other just doing fsync calls. We want to verify
> + * that there are no crashes or deadlocks.
> + *
> + * This program never finishes, it starts two infinite loops to write and fsync
> + * the file. It's meant to be called with the 'timeout' program from coreutils.
> + */
> +
> +/* Get the O_DIRECT definition. */
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <errno.h>
> +#include <string.h>
> +#include <pthread.h>
> +
> +static int fd;
> +
> +static ssize_t do_write(int fd, const void *buf, size_t count, off_t offset)
> +{
> +        while (count > 0) {
> +		ssize_t ret;
> +
> +		ret = pwrite(fd, buf, count, offset);
> +		if (ret < 0) {
> +			if (errno == EINTR)
> +				continue;
> +			return ret;
> +		}
> +		count -= ret;
> +		buf += ret;
> +	}
> +	return 0;
> +}
> +
> +static void *fsync_loop(void *arg)
> +{
> +	while (1) {
> +		int ret;
> +
> +		ret = fsync(fd);
> +		if (ret != 0) {
> +			perror("Fsync failed");
> +			exit(6);
> +		}
> +	}
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	long pagesize;
> +	void *write_buf;
> +	pthread_t fsyncer;
> +	int ret;
> +
> +	if (argc != 2) {
> +		fprintf(stderr, "Use: %s <file path>\n", argv[0]);
> +		return 1;
> +	}
> +
> +	fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666);
> +	if (fd == -1) {
> +		perror("Failed to open/create file");
> +		return 1;
> +	}
> +
> +	pagesize = sysconf(_SC_PAGE_SIZE);
> +	if (pagesize == -1) {
> +		perror("Failed to get page size");
> +		return 2;
> +	}
> +
> +	ret = posix_memalign(&write_buf, pagesize, pagesize);
> +	if (ret) {
> +		perror("Failed to allocate buffer");
> +		return 3;
> +	}
> +
> +	ret = pthread_create(&fsyncer, NULL, fsync_loop, NULL);
> +	if (ret != 0) {
> +		fprintf(stderr, "Failed to create writer thread: %d\n", ret);
> +		return 4;
> +	}
> +
> +	while (1) {
> +		ret = do_write(fd, write_buf, pagesize, 0);
> +		if (ret != 0) {
> +			perror("Write failed");
> +			exit(5);
> +		}
> +	}
> +
> +	return 0;
> +}
> diff --git a/tests/generic/363 b/tests/generic/363
> new file mode 100755
> index 00000000..21159e24
> --- /dev/null
> +++ b/tests/generic/363
> @@ -0,0 +1,30 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2024 SUSE Linux Products GmbH. All Rights Reserved.
> +#
> +# FS QA Test 363
> +#
> +# Test that a program that has 2 threads using the same file descriptor and
> +# concurrently doing direct IO writes and fsync doesn't trigger any crash or
> +# deadlock.
> +#
> +. ./common/preamble
> +_begin_fstest auto quick
> +
> +_require_test
> +_require_odirect
> +_require_test_program dio-write-fsync-same-fd
> +_require_command "$TIMEOUT_PROG" timeout
> +
> +[ $FSTYP == "btrfs" ] && \
> +	_fixed_by_kernel_commit xxxxxxxxxxxx \
> +	"btrfs: fix race between direct IO write and fsync when using same fd"
> +
> +# On error the test program writes messages to stderr, causing a golden output
> +# mismatch and making the test fail.
> +$TIMEOUT_PROG 10s $here/src/dio-write-fsync-same-fd $TEST_DIR/dio-write-fsync-same-fd

Hi Filipe,

Thanks for this new test case. How reproducible is this test? I tried to run it on
a linux v6.11-rc3+ without above kernel fix, but test passed. Does this reproducer
need some specical test conditions?

  # ./check -s default generic/363
  SECTION       -- default
  FSTYP         -- btrfs
  PLATFORM      -- Linux/x86_64 dell-xxxxx-xx 6.11.0-0.rc3.20240814git6b0f8db921ab.32.fc42.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Aug 14 16:46:57 UTC 2024
  MKFS_OPTIONS  -- /dev/sda6
  MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda6 /mnt/scratch

  generic/363 10s ...  10s
  Ran: generic/363
  Passed all 1 test

Thanks,
Zorro

> +
> +# success, all done
> +echo "Silence is golden"
> +status=0
> +exit
> diff --git a/tests/generic/363.out b/tests/generic/363.out
> new file mode 100644
> index 00000000..d03d2dc2
> --- /dev/null
> +++ b/tests/generic/363.out
> @@ -0,0 +1,2 @@
> +QA output created by 363
> +Silence is golden
> -- 
> 2.43.0
> 
> 





[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux