Re: [PATCH v2] generic: test data integrity with mixed buffer read and aio dio write

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



On Thu, Aug 17, 2017 at 11:31:45PM +0800, Zorro Lang wrote:
> When mixing buffered reads and asynchronous direct writes, it is
> possible to end up with the situation where we have stale data in
> the page cache while the new data is already written to disk.
> 
> Signed-off-by: Zorro Lang <zlang@xxxxxxxxxx>
> ---
> 
> Hi,
> 
> V2 did below changes:
> 
> On script:
> 1) End this case in limited time, not in limited loops.
> 2) Start up more reader processes, to reproduce this bug easier.
> 
> On C program
> 3) Do DIO read to check on-disk data, if find stale data from buffer reading.
> 4) alloc cmp_buf once in init_test() function, doesn't alloc it everytime.
> 
> Thanks,
> Zorro
> 
>  .gitignore                                |   1 +
>  src/aio-dio-regress/aio-dio-cycle-write.c | 285 ++++++++++++++++++++++++++++++
>  tests/generic/999                         |  86 +++++++++
>  tests/generic/999.out                     |   2 +
>  tests/generic/group                       |   1 +
>  5 files changed, 375 insertions(+)
>  create mode 100644 src/aio-dio-regress/aio-dio-cycle-write.c
>  create mode 100755 tests/generic/999
>  create mode 100644 tests/generic/999.out
> 
> diff --git a/.gitignore b/.gitignore
> index fcbc0cd4..28fe84d5 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -133,6 +133,7 @@
>  /src/writemod
>  /src/writev_on_pagefault
>  /src/xfsctl
> +/src/aio-dio-regress/aio-dio-cycle-write
>  /src/aio-dio-regress/aio-dio-extend-stat
>  /src/aio-dio-regress/aio-dio-fcntl-race
>  /src/aio-dio-regress/aio-dio-hole-filling-race
> diff --git a/src/aio-dio-regress/aio-dio-cycle-write.c b/src/aio-dio-regress/aio-dio-cycle-write.c
> new file mode 100644
> index 00000000..94768381
> --- /dev/null
> +++ b/src/aio-dio-regress/aio-dio-cycle-write.c
> @@ -0,0 +1,285 @@
> +/*
> + * Directly AIO re-write a file with different content again and again.
> + * And check the data integrity.
> + *
> + * Copyright (C) 2017 Red Hat, Inc. All Rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> + */
> +
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +
> +#include <libaio.h>
> +
> +unsigned long buf_size = 0;
> +unsigned long loop_count = 0;
> +void *test_buf[2];
> +void *cmp_buf;
> +#define IO_PATTERN1	0x55
> +#define IO_PATTERN2	0xaa
> +
> +void usage(char *progname)
> +{
> +	fprintf(stderr, "usage: %s [-c loop_count] [-b bufsize] filename\n"
> +	        "\t-c loopcount: specify how many times to test"
> +	        "\t-b bufsize: keep writing from offset 0 to this size",
> +	        progname);
> +	exit(1);
> +}
> +
> +void dump_buffer(
> +	void	*buf,
> +	off64_t	offset,
> +	ssize_t	len)

This function declaration is different from other functions, better to
keep them consistent.

> +{
> +	int	i, j;
> +	char	*p;
> +	int	new;
> +
> +	for (i = 0, p = (char *)buf; i < len; i += 16) {
> +		char    *s = p;
> +
> +		if (i && !memcmp(p, p - 16, 16)) {
> +			new = 0;
> +		} else {
> +			if (i)
> +				fprintf(stderr, "*\n");
> +			new = 1;
> +		}
> +
> +		if (!new) {
> +			p += 16;
> +			continue;
> +		}
> +
> +		fprintf(stderr, "%08llx  ", (unsigned long long)offset + i);
> +		for (j = 0; j < 16 && i + j < len; j++, p++)
> +			fprintf(stderr, "%02x ", *p);
> +		fprintf(stderr, " ");
> +		for (j = 0; j < 16 && i + j < len; j++, s++) {
> +			if (isalnum((int)*s))
> +				fprintf(stderr, "%c", *s);
> +			else
> +				fprintf(stderr, ".");
> +		}
> +		fprintf(stderr, "\n");
> +

Extra new line can be removed.

> +	}
> +	fprintf(stderr, "%08llx\n", (unsigned long long)offset + i);
> +}
> +
> +int init_test(char *filename)
> +{
> +	int fd;
> +	int err = 0;
> +
> +	fd = open(filename, O_DIRECT | O_CREAT | O_RDWR, 0600);
> +	if (fd == -1) {
> +		perror("open");
> +		exit(1);
> +	}
> +
> +	ftruncate(fd, buf_size);
> +
> +	/* fill test_buf[0] with IO_PATTERN1 */
> +	err = posix_memalign(&(test_buf[0]), getpagesize(), buf_size);
> +	if (err) {
> +		fprintf(stderr, "error %s during %s\n",
> +			strerror(err),
> +			"posix_memalign");

This looks odd, why not:

printf(stderr, "error %s during posix_memalign\n", stderror(err));

> +		exit(1);
> +	}
> +	memset(test_buf[0], IO_PATTERN1, buf_size);
> +
> +	/* fill test_buf[1] with IO_PATTERN2 */
> +	err = posix_memalign(&(test_buf[1]), getpagesize(), buf_size);
> +	if (err) {
> +		fprintf(stderr, "error %s during %s\n",
> +			strerror(err),
> +			"posix_memalign");

Same here.

> +		exit(1);
> +	}
> +	memset(test_buf[1], IO_PATTERN2, buf_size);
> +
> +	/* fill test file with IO_PATTERN1 */
> +	if (pwrite(fd, test_buf[0], buf_size, 0) != buf_size) {
> +		perror("pwrite");
> +		exit(1);
> +	}
> +
> +	/* cmp_buf is used to compare with test_buf */
> +	err = posix_memalign(&cmp_buf, getpagesize(), buf_size);
> +	if (err) {
> +		fprintf(stderr, "error %s during %s\n",
> +			strerror(err),
> +			"posix_memalign");

Same here.

Given that this pattern repeats three times, factor out a function?

> +		exit(1);
> +	}
> +	memset(cmp_buf, 0, buf_size);
> +
> +	fsync(fd);
> +	close(fd);
> +
> +	return 0;
> +}
> +
> +/*
> + * Read file content back, then compare with the 'buf' which
> + * point to the original buffer was written to file.
> + */
> +int fs_check(char *filename, void *buf)

Rename it to "file_check"? fs_check leads me to fsck :)

> +{
> +

Extra new line.

> +	int fd;
> +	int rc = 0;
> +
> +	/* check with buffer read */
> +	fd = open(filename, O_RDONLY, 0600);
> +	if (fd == -1) {
> +		perror("open");
> +		exit(1);
> +	}
> +
> +	if (pread(fd, cmp_buf, buf_size, 0) != buf_size) {
> +		perror("pread");
> +		exit(1);
> +	}
> +
> +	if (memcmp(buf, cmp_buf, buf_size)) {
> +		fprintf(stderr, "get stale data from buffer read\n");
> +		dump_buffer(cmp_buf, 0, buf_size);
> +		rc++;
> +	}
> +
> +	/*
> +	 * check with direct-IO read, if buffer read find stale data. Make sure
> +	 * if the stale data is from caches or disk.
> +	 */
> +	if (rc != 0) {

I think we can check on-disk data unconditionally, even buffer read
reads correct content, it's better to make sure the on-disk content is
correct too.

> +		close(fd);
> +		fd = open(filename, O_DIRECT|O_RDONLY, 0600);
> +		if (fd < 0) {
> +			perror("open with direct-IO");
> +			exit(1);
> +		}
> +
> +		if (pread(fd, cmp_buf, buf_size, 0) != buf_size) {
> +			perror("pread");
> +			exit(1);
> +		}
> +
> +		if (memcmp(buf, cmp_buf, buf_size)) {
> +			fprintf(stderr, "get stale data from DIO read\n");
> +			dump_buffer(cmp_buf, 0, buf_size);
> +			rc++;
> +		}
> +	}
> +
> +	close(fd);
> +	return rc;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct io_context *ctx = NULL;
> +	struct io_event evs[1];
> +	struct iocb iocb1;
> +	struct iocb *iocbs[] = { &iocb1 };
> +	int fd, err = 0;
> +	int i;
> +	int c;
> +	char *filename = NULL;
> +
> +	while ((c = getopt(argc, argv, "c:b:")) != -1) {
> +		char *endp;
> +
> +		switch (c) {
> +		case 'c':	/* the number of testing cycles */
> +			loop_count = strtol(optarg, &endp, 0);
> +			break;
> +		case 'b':	/* buffer size */
> +			buf_size = strtol(optarg, &endp, 0);
> +			break;
> +		default:
> +			usage(argv[0]);
> +		}
> +	}
> +
> +	if (loop_count == 0)
> +		loop_count = 1600;
> +	if (buf_size == 0)	/* default minimum buffer size is 65536 bytes */
> +		buf_size = 65536;
> +
> +	if (optind == argc - 1)
> +		filename = argv[optind];
> +	else
> +		usage(argv[0]);
> +
> +	init_test(filename);
> +
> +	err = io_setup(1, &ctx);
> +	if (err) {
> +		fprintf(stderr, "error %s during %s\n",
> +		        strerror(err),
> +		        "io_setup");
> +		return 1;
> +	}
> +
> +	i = 0;
> +	/*
> +	 * Keep running until loop_count times, fill the file with IO_PATTERN1
> +	 * or IO_PATTERN2 one by one, then read the file data back to check if
> +	 * there's stale data.
> +	 */
> +	while (loop_count--) {
> +		i++;
> +		i %= 2;
> +		fd = open(filename, O_DIRECT | O_CREAT | O_RDWR, 0600);
> +		if (fd == -1) {
> +			perror("open");
> +			return 1;
> +		}
> +
> +		io_prep_pwrite(&iocb1, fd, test_buf[i], buf_size, 0);
> +		err = io_submit(ctx, 1, iocbs);
> +		if (err != 1) {
> +			fprintf(stderr, "error %s during %s\n",
> +				strerror(err),
> +				"io_submit");
> +			return 1;
> +		}
> +		err = io_getevents(ctx, 1, 1, evs, NULL);
> +		if (err != 1) {
> +			fprintf(stderr, "error %s during %s\n",
> +				strerror(err),
> +				"io_getevents");
> +			return 1;
> +		}
> +		close(fd);
> +
> +		err = fs_check(filename, test_buf[i]);
> +		if (err != 0)
> +			break;
> +	}
> +
> +	return 0;
> +}
> diff --git a/tests/generic/999 b/tests/generic/999
> new file mode 100755
> index 00000000..edce8885
> --- /dev/null
> +++ b/tests/generic/999
> @@ -0,0 +1,86 @@
> +#! /bin/bash
> +# FS QA Test No. 999
> +#
> +# Test data integrity when mixing buffered reads and asynchronous
> +# direct writes a file.
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2017 Red Hat Inc.  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#-----------------------------------------------------------------------
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_test
> +_require_test_program "feature"
> +_require_aiodio aio-dio-cycle-write
> +
> +TESTFILE=$TEST_DIR/tst-aio-dio-cycle-write.$seq
> +FSIZE=655360	# bytes
> +
> +# More read processes can help to reproduce the bug easier, so run
> +# 2 ~ 20 readers according to the number of CPUs
> +nr_cpu=`$here/src/feature -o`
> +loops=$((nr_cpu / 2))
> +if [ $loops -lt 2 ]; then
> +	loops=2
> +elif [ $loops -gt 20 ]; then
> +	loops=20
> +fi
> +
> +# buffered reads the file frequently
> +for ((i=0; i<loops; i++)); do
> +	while true; do
> +		$XFS_IO_PROG -f -c "pread 0 $FSIZE" $TESTFILE >/dev/null 2>&1
> +	done &
> +	reader_pid="$reader_pid $!"
> +done
> +
> +# start an aio writer, which does writing loops internally and check
> +# data integrality.
> +# For reproduce the original bug, keep testing about 30s will be better,
> +# So let the AIO_TEST run as many loops as it can, then kill it in 20s.
> +timeout -s TERM 30s $AIO_TEST -c 999999 -b $FSIZE $TESTFILE >/dev/null

Not a big deal, but better to define a TIMEOUT_PROG and _require_command
in this test, timeout is not available on some old distributions.

> +
> +kill $reader_pid
> +wait $reader_pid

This only kills the 'while true; do ...; done' loop, it's still possible
that the xfs_io is still running and blocking umount.

One workaound would be like:

stop=$tmp.stop
rm -f $stop
for ((i=0; i<loops; i++)); do
	while [ ! -e $stop ]; done
		<xfs_io pread here>
	done &
done
<$AIO_TEST here>
touch $stop
wait $reader_pid

So when wait returns, we're sure all xfs_io processes are done.

Thanks,
Eryu

> +
> +echo "Silence is golden"
> +
> +status=0
> +exit
> diff --git a/tests/generic/999.out b/tests/generic/999.out
> new file mode 100644
> index 00000000..3b276ca8
> --- /dev/null
> +++ b/tests/generic/999.out
> @@ -0,0 +1,2 @@
> +QA output created by 999
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index e13b5683..08462eee 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -452,3 +452,4 @@
>  447 auto quick clone
>  448 auto quick rw
>  449 auto quick acl enospc
> +999 auto aio rw
> -- 
> 2.13.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux