On 11/12/24 10:06 AM, Jens Axboe wrote: > On 11/12/24 9:39 AM, Brian Foster wrote: >> On Tue, Nov 12, 2024 at 08:14:28AM -0700, Jens Axboe wrote: >>> On 11/11/24 10:13 PM, Christoph Hellwig wrote: >>>> On Mon, Nov 11, 2024 at 04:42:25PM -0700, Jens Axboe wrote: >>>>> Here's the slightly cleaned up version, this is the one I ran testing >>>>> with. >>>> >>>> Looks reasonable to me, but you probably get better reviews on the >>>> fstests lists. >>> >>> I'll send it out once this patchset is a bit closer to integration, >>> there's the usual chicken and egg situation with it. For now, it's quite >>> handy for my testing, found a few issues with this version. So thanks >>> for the suggestion, sure beats writing more of your own test cases :-) >>> >> >> fsx support is probably a good idea as well. It's similar in idea to >> fsstress, but bashes the same file with mixed operations and includes >> data integrity validation checks as well. It's pretty useful for >> uncovering subtle corner case issues or bad interactions.. > > Indeed, I did that too. Re-running xfstests right now with that too. Here's what I'm running right now, fwiw. It adds RWF_UNCACHED support for both the sync read/write and io_uring paths. diff --git a/ltp/fsx.c b/ltp/fsx.c index 41933354..104910ff 100644 --- a/ltp/fsx.c +++ b/ltp/fsx.c @@ -43,6 +43,10 @@ # define MAP_FILE 0 #endif +#ifndef RWF_UNCACHED +#define RWF_UNCACHED 0x80 +#endif + #define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */ /* Operation flags (bitmask) */ @@ -101,7 +105,9 @@ int logcount = 0; /* total ops */ enum { /* common operations */ OP_READ = 0, + OP_READ_UNCACHED, OP_WRITE, + OP_WRITE_UNCACHED, OP_MAPREAD, OP_MAPWRITE, OP_MAX_LITE, @@ -190,15 +196,16 @@ int o_direct; /* -Z */ int aio = 0; int uring = 0; int mark_nr = 0; +int rwf_uncached = 1; int page_size; int page_mask; int mmap_mask; -int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset); +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags); #define READ 0 #define WRITE 1 -#define fsxread(a,b,c,d) fsx_rw(READ, a,b,c,d) -#define fsxwrite(a,b,c,d) fsx_rw(WRITE, a,b,c,d) +#define fsxread(a,b,c,d,f) fsx_rw(READ, a,b,c,d,f) +#define fsxwrite(a,b,c,d,f) fsx_rw(WRITE, a,b,c,d,f) struct timespec deadline; @@ -266,7 +273,9 @@ prterr(const char *prefix) static const char *op_names[] = { [OP_READ] = "read", + [OP_READ_UNCACHED] = "read_uncached", [OP_WRITE] = "write", + [OP_WRITE_UNCACHED] = "write_uncached", [OP_MAPREAD] = "mapread", [OP_MAPWRITE] = "mapwrite", [OP_TRUNCATE] = "truncate", @@ -393,12 +402,14 @@ logdump(void) prt("\t******WWWW"); break; case OP_READ: + case OP_READ_UNCACHED: prt("READ 0x%x thru 0x%x\t(0x%x bytes)", lp->args[0], lp->args[0] + lp->args[1] - 1, lp->args[1]); if (overlap) prt("\t***RRRR***"); break; + case OP_WRITE_UNCACHED: case OP_WRITE: prt("WRITE 0x%x thru 0x%x\t(0x%x bytes)", lp->args[0], lp->args[0] + lp->args[1] - 1, @@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size) } void -doread(unsigned offset, unsigned size) +__doread(unsigned offset, unsigned size, int flags) { - off_t ret; unsigned iret; offset -= offset % readbdy; @@ -818,23 +828,39 @@ doread(unsigned offset, unsigned size) (monitorend == -1 || offset <= monitorend)))))) prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls, offset, offset + size - 1, size); - ret = lseek(fd, (off_t)offset, SEEK_SET); - if (ret == (off_t)-1) { - prterr("doread: lseek"); - report_failure(140); - } - iret = fsxread(fd, temp_buf, size, offset); + iret = fsxread(fd, temp_buf, size, offset, flags); if (iret != size) { - if (iret == -1) - prterr("doread: read"); - else + if (iret == -1) { + if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) { + rwf_uncached = 1; + return; + } + prterr("dowrite: read"); + } else { prt("short read: 0x%x bytes instead of 0x%x\n", iret, size); + } report_failure(141); } check_buffers(temp_buf, offset, size); } +void +doread(unsigned offset, unsigned size) +{ + __doread(offset, size, 0); +} +void +doread_uncached(unsigned offset, unsigned size) +{ + if (rwf_uncached) { + __doread(offset, size, RWF_UNCACHED); + if (rwf_uncached) + return; + } + __doread(offset, size, 0); +} + void check_eofpage(char *s, unsigned offset, char *p, int size) { @@ -870,7 +896,6 @@ check_contents(void) unsigned map_offset; unsigned map_size; char *p; - off_t ret; unsigned iret; if (!check_buf) { @@ -885,13 +910,7 @@ check_contents(void) if (size == 0) return; - ret = lseek(fd, (off_t)offset, SEEK_SET); - if (ret == (off_t)-1) { - prterr("doread: lseek"); - report_failure(140); - } - - iret = fsxread(fd, check_buf, size, offset); + iret = fsxread(fd, check_buf, size, offset, 0); if (iret != size) { if (iret == -1) prterr("check_contents: read"); @@ -1064,9 +1083,8 @@ update_file_size(unsigned offset, unsigned size) } void -dowrite(unsigned offset, unsigned size) +__dowrite(unsigned offset, unsigned size, int flags) { - off_t ret; unsigned iret; offset -= offset % writebdy; @@ -1101,18 +1119,18 @@ dowrite(unsigned offset, unsigned size) (monitorend == -1 || offset <= monitorend)))))) prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls, offset, offset + size - 1, size); - ret = lseek(fd, (off_t)offset, SEEK_SET); - if (ret == (off_t)-1) { - prterr("dowrite: lseek"); - report_failure(150); - } - iret = fsxwrite(fd, good_buf + offset, size, offset); + iret = fsxwrite(fd, good_buf + offset, size, offset, flags); if (iret != size) { - if (iret == -1) + if (iret == -1) { + if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) { + rwf_uncached = 0; + return; + } prterr("dowrite: write"); - else + } else { prt("short write: 0x%x bytes instead of 0x%x\n", iret, size); + } report_failure(151); } if (do_fsync) { @@ -1126,6 +1144,22 @@ dowrite(unsigned offset, unsigned size) } } +void +dowrite(unsigned offset, unsigned size) +{ + __dowrite(offset, size, 0); +} + +void +dowrite_uncached(unsigned offset, unsigned size) +{ + if (rwf_uncached) { + __dowrite(offset, size, RWF_UNCACHED); + if (rwf_uncached) + return; + } + __dowrite(offset, size, 0); +} void domapwrite(unsigned offset, unsigned size) @@ -2340,11 +2374,21 @@ have_op: doread(offset, size); break; + case OP_READ_UNCACHED: + TRIM_OFF_LEN(offset, size, file_size); + doread_uncached(offset, size); + break; + case OP_WRITE: TRIM_OFF_LEN(offset, size, maxfilelen); dowrite(offset, size); break; + case OP_WRITE_UNCACHED: + TRIM_OFF_LEN(offset, size, maxfilelen); + dowrite_uncached(offset, size); + break; + case OP_MAPREAD: TRIM_OFF_LEN(offset, size, file_size); domapread(offset, size); @@ -2702,7 +2746,7 @@ uring_setup() } int -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset) +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; @@ -2733,6 +2777,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset) } else { io_uring_prep_writev(sqe, fd, &iovec, 1, o); } + sqe->rw_flags = flags; ret = io_uring_submit_and_wait(&ring, 1); if (ret != 1) { @@ -2781,7 +2826,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset) } #else int -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset) +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags) { fprintf(stderr, "io_rw: need IO_URING support!\n"); exit(111); @@ -2789,19 +2834,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset) #endif int -fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset) +fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags) { int ret; if (aio) { ret = aio_rw(rw, fd, buf, len, offset); } else if (uring) { - ret = uring_rw(rw, fd, buf, len, offset); + ret = uring_rw(rw, fd, buf, len, offset, flags); } else { + struct iovec iov = { .iov_base = buf, .iov_len = len }; + if (rw == READ) - ret = read(fd, buf, len); + ret = preadv2(fd, &iov, 1, offset, flags); else - ret = write(fd, buf, len); + ret = pwritev2(fd, &iov, 1, offset, flags); } return ret; } -- Jens Axboe