Re: [PATCH v2] generic: unaligned direct AIO write test

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



On Tue, Mar 19, 2019 at 09:49:51PM +0800, Zorro Lang wrote:
> A simply reproducer from Frank Sorenson:
> 
>   ftruncate(fd, 65012224)
>   io_prep_pwrite(iocbs[0], fd, buf[0], 1048576, 63963648);
>   io_prep_pwrite(iocbs[1], fd, buf[1], 1048576, 65012224);
> 
>   io_submit(io_ctx, 1, &iocbs[0]);
>   io_submit(io_ctx, 1, &iocbs[1]);
> 
>   io_getevents(io_ctx, 2, 2, events, NULL)
> 
> help to find an ext4 corruption:
>            **************** **************** ****************
>            *    page 1    * *    page 2    * *    page 3    *
>            **************** **************** ****************
>   existing 0000000000000000 0000000000000000 0000000000000000
>   write 1    AAAAAAAAAAAAAA AA
>   write 2                     BBBBBBBBBBBBBB BB
> 
>   result   00AAAAAAAAAAAAAA 00BBBBBBBBBBBBBB BB00000000000000
>   desired  00AAAAAAAAAAAAAA AABBBBBBBBBBBBBB BB00000000000000
> 
> This issue remind us we might miss unaligned AIO test for long time.
> We thought fsx cover this part, but looks like it's not. So this case
> trys to cover unaligned direct AIO write test on file with different
> initial truncate i_size.

I think it looks good and I do like how you've done the new
aio-dio-write-verify tool.

Reviewed-by: Lukas Czerner <lczerner@xxxxxxxxxx>

Thanks!
-Lukas

> 
> Signed-off-by: Zorro Lang <zlang@xxxxxxxxxx>
> ---
> 
> V2 re-write the .c program totally and changed the generic/999.
> 
> Thanks,
> Zorro
> 
>  .gitignore                                 |   1 +
>  src/aio-dio-regress/aio-dio-write-verify.c | 338 +++++++++++++++++++++
>  tests/generic/999                          |  96 ++++++
>  tests/generic/999.out                      |   2 +
>  tests/generic/group                        |   1 +
>  5 files changed, 438 insertions(+)
>  create mode 100644 src/aio-dio-regress/aio-dio-write-verify.c
>  create mode 100755 tests/generic/999
>  create mode 100644 tests/generic/999.out
> 
> diff --git a/.gitignore b/.gitignore
> index 6454e4d6..c13fb713 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -162,6 +162,7 @@
>  /src/aio-dio-regress/aio-dio-invalidate-failure
>  /src/aio-dio-regress/aio-dio-invalidate-readahead
>  /src/aio-dio-regress/aio-dio-subblock-eof-read
> +/src/aio-dio-regress/aio-dio-write-verify
>  /src/aio-dio-regress/aio-free-ring-with-bogus-nr-pages
>  /src/aio-dio-regress/aio-io-setup-with-nonwritable-context-pointer
>  /src/aio-dio-regress/aio-last-ref-held-by-io
> diff --git a/src/aio-dio-regress/aio-dio-write-verify.c b/src/aio-dio-regress/aio-dio-write-verify.c
> new file mode 100644
> index 00000000..823410d0
> --- /dev/null
> +++ b/src/aio-dio-regress/aio-dio-write-verify.c
> @@ -0,0 +1,338 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (C) 2019 Red Hat, Inc. All Rights reserved.
> + *
> + * This program is used to do AIO write test. Each <-a size=N,off=M> field
> + * specifies an AIO write. More this kind of fields will be combined to a
> + * group of AIO write, then io_submit them together. All AIO write field can
> + * overlap or be sparse.
> + *
> + * After all AIO write operations done, each [size, off] content will be read
> + * back, verify if there's corruption.
> + *
> + * Before doing a series of AIO write, an optional ftruncate operation can be
> + * chosen. To truncate the file i_size to a specified location for testing.
> + *
> + */
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +
> +#include <libaio.h>
> +
> +#define MAX_EVENT_NUM	128
> +#define IO_PATTERN	0x5a
> +
> +void
> +usage(char *progname)
> +{
> +	fprintf(stderr, "usage: %s [-t truncsize ] <-a size=N,off=M [-a ...]>  filename\n"
> +	        "\t-t truncsize: truncate the file to a special size before AIO wirte\n"
> +	        "\t-a: specify once AIO write size and startoff, this option can be specified many times, but less than 128\n"
> +	        "\t\tsize=N: AIO write size\n"
> +	        "\t\toff=M:  AIO write startoff\n"
> +	        "e.g: %s -t 4608 -a size=4096,off=512 -a size=4096,off=4608 filename\n",
> +	        progname, progname);
> +	exit(1);
> +}
> +
> +void
> +dump_buffer(
> +	void	*buf,
> +	off64_t	offset,
> +	ssize_t	len)
> +{
> +	int	i, j;
> +	char	*p;
> +	int	new;
> +
> +	for (i = 0, p = (char *)buf; i < len; i += 16) {
> +		char    *s = p;
> +
> +		if (i && !memcmp(p, p - 16, 16)) {
> +			new = 0;
> +		} else {
> +			if (i)
> +				printf("*\n");
> +			new = 1;
> +		}
> +
> +		if (!new) {
> +			p += 16;
> +			continue;
> +		}
> +
> +		printf("%08llx  ", (unsigned long long)offset + i);
> +		for (j = 0; j < 16 && i + j < len; j++, p++)
> +			printf("%02x ", *p);
> +		printf(" ");
> +		for (j = 0; j < 16 && i + j < len; j++, s++) {
> +			if (isalnum((int)*s))
> +				printf("%c", *s);
> +			else
> +				printf(".");
> +		}
> +		printf("\n");
> +
> +	}
> +	printf("%08llx\n", (unsigned long long)offset + i);
> +}
> +
> +/* Parameters for once AIO write&verify testing */
> +struct io_params {
> +	void          *buf;
> +	void          *cmp_buf;
> +	unsigned long buf_size;	/* the size of AIO write */
> +	unsigned long offset;	/* AIO write offset*/
> +};
> +
> +struct io_params_node {
> +	struct iocb iocb;
> +	struct io_params *param;
> +	struct io_params_node *next;
> +};
> +
> +struct io_params_node *iop_head = NULL;
> +
> +/* Suboptions of '-a' option */
> +enum {
> +	BFS_OPT = 0,
> +	OFS_OPT
> +};
> +
> +char *const token[] = {
> +	[BFS_OPT]	= "size",	/* buffer size of once AIO write */
> +	[OFS_OPT]	= "off",	/* start offset */
> +	NULL
> +};
> +
> +/* Combine each AIO write operation things to a linked list */
> +static int add_io_param(unsigned long bs, \
> +                        unsigned long off)
> +{
> +	struct io_params_node *io = NULL;
> +	struct io_params_node **p = &iop_head;
> +
> +	io = malloc(sizeof(struct io_params_node));
> +	if (!io)
> +		goto err_out;
> +
> +	io->param = malloc(sizeof(struct io_params));
> +	if (!io->param)
> +		goto err_out;
> +
> +	io->param->buf_size = bs;
> +	io->param->offset = off;
> +
> +	io->next = NULL;
> +
> +	if (bs > 0) {
> +		if (posix_memalign(&io->param->buf, getpagesize(), bs))
> +			goto err_out;
> +		io->param->cmp_buf = malloc(bs);
> +		if (io->param->cmp_buf == NULL)
> +			goto err_out;
> +		memset(io->param->buf, IO_PATTERN, bs);
> +		memset(io->param->cmp_buf, IO_PATTERN, bs);
> +	} else {
> +		return 1;
> +	}
> +
> +	/* find the tail */
> +	while(*p != NULL) {
> +		p = &((*p)->next);
> +	}
> +	*p = io;
> +
> +	return 0;
> +
> + err_out:
> +	perror("alloc memory");
> +	return 1;
> +}
> +
> +static int parse_subopts(char *arg)
> +{
> +	char *p = arg;
> +	char *value;
> +	unsigned long bsize = 0;
> +	unsigned long off = 0;
> +
> +	while (*p != '\0') {
> +		char *endp;
> +
> +		switch(getsubopt(&p, token, &value)) {
> +		case BFS_OPT:
> +			bsize = strtoul(value, &endp, 0);
> +			break;
> +		case OFS_OPT:
> +			off = strtoul(value, &endp, 0);
> +			break;
> +		default:
> +			fprintf(stderr, "No match found for subopt %s\n", \
> +			        value);
> +			return 1;
> +		}
> +	}
> +
> +	return add_io_param(bsize, off);
> +}
> +
> +static int io_write(int fd, int num_events)
> +{
> +	int err;
> +	struct io_params_node *p = iop_head;
> +	struct iocb **iocbs;
> +	struct io_event *evs;
> +	struct io_context *ctx = NULL;
> +	int i;
> +
> +	err = io_setup(num_events, &ctx);
> +	if (err) {
> +		perror("io_setup");
> +		return 1;
> +	}
> +
> +	iocbs = (struct iocb **)malloc(num_events * sizeof(struct iocb *));
> +	if (iocbs == NULL) {
> +		perror("malloc");
> +		return 1;
> +	}
> +
> +	evs = malloc(num_events * sizeof(struct io_event));
> +	if (evs == NULL) {
> +		perror("malloc");
> +		return 1;
> +	}
> +
> +	i = 0;
> +	while (p != NULL) {
> +		iocbs[i] = &(p->iocb);
> +		io_prep_pwrite(&p->iocb, fd, p->param->buf, \
> +		               p->param->buf_size, p->param->offset);
> +		p = p->next;
> +		i++;
> +	}
> +
> +	err = io_submit(ctx, num_events, iocbs);
> +	if (err != num_events) {
> +		fprintf(stderr, "error %s during %s\n",
> +		        strerror(err),
> +		        "io_submit");
> +		return 1;
> +	}
> +
> +	err = io_getevents(ctx, num_events, num_events, evs, NULL);
> +	if (err != num_events) {
> +		fprintf(stderr, "error %s during %s\n",
> +		        strerror(err),
> +		        "io_getevents");
> +		return 1;
> +	}
> +
> +	/* Try to destroy at here, not necessary, so don't check result */
> +	io_destroy(ctx);
> +
> +	return 0;
> +}
> +
> +static int io_verify(int fd)
> +{
> +	struct io_params_node *p = iop_head;
> +	ssize_t sret;
> +	int corrupted = 0;
> +
> +	while(p != NULL) {
> +		sret = pread(fd, p->param->buf, \
> +		             p->param->buf_size, p->param->offset);
> +		if (sret == -1) {
> +			perror("pread");
> +			return 1;
> +		} else if (sret != p->param->buf_size) {
> +			fprintf(stderr, "short read %zd was less than %zu\n",
> +			        sret, p->param->buf_size);
> +			return 1;
> +		}
> +		if (memcmp(p->param->buf, \
> +		           p->param->cmp_buf, p->param->buf_size)) {
> +			printf("Find corruption\n");
> +			dump_buffer(p->param->buf, p->param->offset, \
> +			            p->param->buf_size);
> +			corrupted++;
> +		}
> +		p = p->next;
> +	}
> +
> +	return corrupted;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int fd;
> +	int c;
> +	char *filename = NULL;
> +	int num_events = 0;
> +	off_t tsize = 0;
> +
> +	while ((c = getopt(argc, argv, "a:t:")) != -1) {
> +		char *endp;
> +
> +		switch (c) {
> +		case 'a':
> +			if (parse_subopts(optarg) == 0) {
> +				num_events++;
> +			} else {
> +				fprintf(stderr, "Bad subopt %s\n", optarg);
> +				usage(argv[0]);
> +			}
> +			break;
> +		case 't':
> +			tsize = strtoul(optarg, &endp, 0);
> +			break;
> +		default:
> +			usage(argv[0]);
> +		}
> +	}
> +
> +	if (num_events > MAX_EVENT_NUM) {
> +		fprintf(stderr, "Too many AIO events, %d > %d\n", \
> +		        num_events, MAX_EVENT_NUM);
> +		usage(argv[0]);
> +	}
> +
> +	if (optind == argc - 1)
> +		filename = argv[optind];
> +	else
> +		usage(argv[0]);
> +
> +	fd = open(filename, O_DIRECT | O_CREAT | O_TRUNC | O_RDWR, 0600);
> +	if (fd == -1) {
> +		perror("open");
> +		return 1;
> +	}
> +
> +	if (tsize > 0) {
> +		if (ftruncate(fd, tsize)) {
> +			perror("ftruncate");
> +			return 1;
> +		}
> +	}
> +
> +	if (io_write(fd, num_events) != 0) {
> +		fprintf(stderr, "AIO write fails\n");
> +		return 1;
> +	}
> +
> +	if (io_verify(fd) != 0) {
> +		fprintf(stderr, "Data verification fails\n");
> +		return 1;
> +	}
> +
> +	close(fd);
> +	return 0;
> +}
> diff --git a/tests/generic/999 b/tests/generic/999
> new file mode 100755
> index 00000000..96924f28
> --- /dev/null
> +++ b/tests/generic/999
> @@ -0,0 +1,96 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2019 Red Hat, Inc.  All Rights Reserved.
> +#
> +# FS QA Test No. 999
> +#
> +# Non-page-aligned direct AIO write test with an initial truncate i_size.
> +#
> +# Uncover "ext4: Fix data corruption caused by unaligned direct AIO":
> +# (Ext4 needs to serialize unaligned direct AIO because the zeroing of
> +# partial blocks of two competing unaligned AIOs can result in data
> +# corruption.
> +#
> +# However it decides not to serialize if the potentially unaligned aio is
> +# past i_size with the rationale that no pending writes are possible past
> +# i_size. Unfortunately if the i_size is not block aligned and the second
> +# unaligned write lands past i_size, but still into the same block, it has
> +# the potential of corrupting the previous unaligned write to the same
> +# block.)
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_test
> +_require_aiodio aio-dio-write-verify
> +_require_test_program "feature"
> +
> +localfile=$TEST_DIR/tst-aio-dio-testfile
> +diosize=`_min_dio_alignment $TEST_DEV`
> +pagesize=`src/feature -s`
> +bufsize=$((pagesize * 2))
> +truncsize=$((bufsize+diosize))
> +
> +# Need smaller logical block size to do non-page-aligned test
> +if [ $diosize -ge $pagesize ];then
> +	_notrun "Need device logical block size($diosize) < page size($pagesize)"
> +fi
> +
> +rm -rf $localfile 2>/dev/null
> +# page-aligned aiodio write verification at first
> +$AIO_TEST -a size=$bufsize,off=0 -a size=$bufsize,off=$bufsize $localfile
> +
> +# non-page-aligned aiodio write verification
> +#          **************** **************** ****************
> +#          *    page 1&2  * *   page 3&4   * *   page 5&6   *
> +#          **************** **************** ****************
> +# existing 0000000000000000 0000000000000000 0000000000000000
> +# truncate ---------------->|
> +# write 1   ZZZZZZZZZZZZZZZ Z
> +# write 2  |<----            ZZZZZZZZZZZZZZZ Z ---->|
> +#
> +# "Write 1" writes 2 pagesize data at off=$diosize.
> +# "Write 2" seeks from 0 to "Write 1" end + page size, shift $diosize bytes each
> +# time, writes 2 pagesize data too.
> +# Verify there's not corruption each time.
> +i=0
> +while [ $((diosize * i)) -lt $((diosize + bufsize + pagesize)) ];do
> +	position=$((diosize * i++))
> +	# non-page-aligned AIO write on different i_size file
> +	$AIO_TEST -t $truncsize -a size=$bufsize,off=$diosize \
> +		  -a size=$bufsize,off=$position \
> +		  $localfile
> +	if [ $? -ne 0 ];then
> +		echo "FAIL: [$truncsize, $bufsize, $diosize, $position]"
> +		echo "-------------------------------------------------"
> +	fi
> +	rm -f $localfile
> +done
> +
> +echo "Silence is golden"
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/999.out b/tests/generic/999.out
> new file mode 100644
> index 00000000..3b276ca8
> --- /dev/null
> +++ b/tests/generic/999.out
> @@ -0,0 +1,2 @@
> +QA output created by 999
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index 15227b67..b1f93d99 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -534,3 +534,4 @@
>  529 auto quick attr
>  530 auto quick unlink
>  531 auto quick unlink
> +999 auto quick aio
> -- 
> 2.17.2
> 



[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux