Am 06.05.20 um 14:41 schrieb Anoop C S: > On Wed, 2020-05-06 at 12:33 +0200, Stefan Metzmacher wrote: >> Hi Anoop, >> >>> I could reproduce the difference in SHA256 checksum after copying a >>> directory with 100 copies of test file(provided by reporter) from >>> io_uring VFS module enabled share using Windows explorer(right- >>> click- >>>> copy/paste). Only 5 out of 100 files had correct checksum after >>>> copy >>> operation :-/ >> >> Great! Can you please try to collect level 1 log files with >> the patch https://bugzilla.samba.org/attachment.cgi?id=15955 >> applied? > > I have attached three log files. > log.io_uring.smbd -- Copy using Windows explorer > log.io_uring-mget.smd -- Copy using smbclient > log.io_uring-powershell.smd -- Copy using `Copy-Item` Thanks! All of them show short reads like: > [2020/05/06 17:27:28.130248, 1] ../../source3/modules/vfs_io_uring.c:103(vfs_io_uring_finish_req) > vfs_io_uring_finish_req: pread ofs=0 (0x0) len=32768 (0x8000) nread=32768 (0x32768) eof=10000000 (0x989680) blks=4096 blocks=19536 dir/1.bin fnum 1607026405 > [2020/05/06 17:27:28.131049, 1] ../../source3/modules/vfs_io_uring.c:103(vfs_io_uring_finish_req) > vfs_io_uring_finish_req: pread ofs=9969664 (0x982000) len=30336 (0x7680) nread=30336 (0x30336) eof=10000000 (0x989680) blks=4096 blocks=19536 dir/1.bin fnum 1607026405 > [2020/05/06 17:27:28.133679, 1] ../../source3/modules/vfs_io_uring.c:103(vfs_io_uring_finish_req) > vfs_io_uring_finish_req: pread ofs=61440 (0xf000) len=32768 (0x8000) nread=32768 (0x32768) eof=10000000 (0x989680) blks=4096 blocks=19536 dir/1.bin fnum 1607026405 > [2020/05/06 17:27:28.140184, 0] ../../source3/modules/vfs_io_uring.c:88(vfs_io_uring_finish_req) > vfs_io_uring_finish_req: Invalid pread ofs=0 (0x0) len=1048576 (0x100000) nread=32768 (0x32768) eof=10000000 (0x989680) blks=4096 blocks=19536 dir/1.bin fnum 1607026405 It seems the first request is at ofs=0 len=32768 (0x8000) and we get 32768 back. A follow up request with ofs=0 len=1048576 (0x100000) only gets the first 32768 bytes which are already in the page cache. I can easily reproduce this with the Ubuntu 5.4 kernel once I add state->ur.sqe.rw_flags |= RWF_NOWAIT; to vfs_io_uring_pread_send() and use this. echo 1 > /proc/sys/vm/drop_caches head -c 1024 /root/samba-test/ff1.dat | hexdump -C 00000000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| * 00000400 smbclient //172.31.9.167/uringff -Uroot%test -c "get ff1.dat" results in this log entries: > [2020/05/06 06:51:57.069990, 0] ../../source3/modules/vfs_io_uring.c:89(vfs_io_uring_finish_req) > vfs_io_uring_finish_req: Invalid pread ofs=0 (0x0) len=8388608 (0x800000) nread=16384 (0x4000) eof=8388608 (0x800000) blks=4096 blocks=16384 ff1.dat fnum 840153065 > [2020/05/06 06:51:57.076882, 1] ../../source3/modules/vfs_io_uring.c:104(vfs_io_uring_finish_req) > vfs_io_uring_finish_req: pread ofs=16384 (0x4000) len=8372224 (0x7fc000) nread=8372224 (0x7fc000) eof=8388608 (0x800000) blks=4096 blocks=16384 ff1.dat fnum 840153065 smbclient is just smart enough to recover itself from the short read. But the windows client isn't. The attached test against liburing (git://git.kernel.dk/liburing) should be able to demonstrate the problem. It can also be found in https://github.com/metze-samba/liburing/tree/implicit-rwf-nowaithttps://github.com/metze-samba/liburing/commit/eb06dcfde747e46bd08bedf9def2e6cb536c39e3 I added the sqe->rw_flags = RWF_NOWAIT; line in order to demonstrate it against the Ubuntu 5.3 and 5.4 kernels. They both seem to have the bug. Can someone run the unmodified test/implicit-rwf_nowait against a newer kernel? Thanks! metze
From eb06dcfde747e46bd08bedf9def2e6cb536c39e3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher <metze@xxxxxxxxx> Date: Wed, 6 May 2020 15:33:53 +0200 Subject: [PATCH] TODO!!! test/implicit-rwf_nowait.c Once this proves the problem, we need a more detailed commit message... --- test/Makefile | 4 +- test/implicit-rwf_nowait.c | 162 +++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 test/implicit-rwf_nowait.c diff --git a/test/Makefile b/test/Makefile index ff4d4b8ec04a..50a75588aca7 100644 --- a/test/Makefile +++ b/test/Makefile @@ -21,7 +21,7 @@ all_targets += poll poll-cancel ring-leak fsync io_uring_setup io_uring_register file-update accept-reuse poll-v-poll fadvise madvise \ short-read openat2 probe shared-wq personality eventfd \ send_recv eventfd-ring across-fork sq-poll-kthread splice \ - lfs-openat lfs-openat-write + lfs-openat lfs-openat-write implicit-rwf_nowait include ../Makefile.quiet @@ -53,7 +53,7 @@ test_srcs := poll.c poll-cancel.c ring-leak.c fsync.c io_uring_setup.c \ file-update.c accept-reuse.c poll-v-poll.c fadvise.c \ madvise.c short-read.c openat2.c probe.c shared-wq.c \ personality.c eventfd.c eventfd-ring.c across-fork.c sq-poll-kthread.c \ - splice.c lfs-openat.c lfs-openat-write.c + splice.c lfs-openat.c lfs-openat-write.c implicit-rwf-nowait.c ifdef CONFIG_HAVE_STATX test_srcs += statx.c diff --git a/test/implicit-rwf_nowait.c b/test/implicit-rwf_nowait.c new file mode 100644 index 000000000000..581df209d295 --- /dev/null +++ b/test/implicit-rwf_nowait.c @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: Implicit RWF_NOWAIT bug + * + */ +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> + +#include "liburing.h" + +#define BLOCK 4096 + +#ifndef RWF_NOWAIT +#define RWF_NOWAIT 8 +#endif + +static int get_file_fd(void) +{ + ssize_t ret; + char *buf; + int fd; + + fd = open("testfile", O_RDWR | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return -1; + } + + buf = malloc(BLOCK); + memset(buf, 0xff, BLOCK); + ret = pwrite(fd, buf, BLOCK, 0); + if (ret != BLOCK) { + if (ret < 0) + perror("write"); + else + printf("Short write\n"); + goto err; + } + ret = pwrite(fd, buf, BLOCK, BLOCK); + if (ret != BLOCK) { + if (ret < 0) + perror("write"); + else + printf("Short write\n"); + goto err; + } + fsync(fd); + + if (posix_fadvise(fd, BLOCK, BLOCK, POSIX_FADV_DONTNEED)) { + perror("fadvise"); +err: + close(fd); + free(buf); + return -1; + } + + free(buf); + return fd; +} + +static void put_file_fd(int fd) +{ + close(fd); + unlink("testfile"); +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iov; + int ret, fd; + + iov.iov_base = malloc(2*BLOCK); + iov.iov_len = BLOCK; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + + } + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + printf("get sqe failed\n"); + return 1; + } + + fd = get_file_fd(); + if (fd < 0) + return 1; + + io_uring_prep_readv(sqe, fd, &iov, 1, 0); + io_uring_sqe_set_data(sqe, (void *)(uintptr_t)0x11111111); + ret = io_uring_submit(&ring); + if (ret != 1) { + printf("Got submit %d, expected 1\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + printf("Ring wait got %d\n", ret); + goto err; + } + ret = (uintptr_t)io_uring_cqe_get_data(cqe); + if (ret != 0x11111111) { + printf("Got invalid data 0x%08x, expected 0x11111111\n", ret); + goto err; + } + io_uring_cq_advance(&ring, 1); + + if (cqe->res != BLOCK) { + printf("cqe res=%d != %u\n", cqe->res, BLOCK); + goto err; + } + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + printf("get sqe failed\n"); + return 1; + } + + iov.iov_len = 2*BLOCK; + io_uring_prep_readv(sqe, fd, &iov, 1, 0); + // Impliet by broken kernels? sqe->rw_flags = RWF_NOWAIT; + io_uring_sqe_set_data(sqe, (void *)(uintptr_t)0x22222222); + ret = io_uring_submit(&ring); + if (ret != 1) { + printf("Got submit %d, expected 1\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + printf("Ring peek got %d\n", ret); + goto err; + } + ret = (uintptr_t)io_uring_cqe_get_data(cqe); + if (ret != 0x22222222) { + printf("Got invalid data 0x%08x, expected 0x22222222\n", ret); + goto err; + } + io_uring_cq_advance(&ring, 1); + + if (cqe->res != 2*BLOCK) { + printf("cqe res=%d != %u\n", cqe->res, 2*BLOCK); + goto err; + } + + put_file_fd(fd); + return 0; +err: + put_file_fd(fd); + return 1; +} -- 2.17.1
Attachment:
signature.asc
Description: OpenPGP digital signature