Re: [PATCH 2/2] fsx: add support for RWF_DONTCACHE

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



On Tue, Jan 07, 2025 at 09:05:15AM -0700, Jens Axboe wrote:
> Using RWF_DONTCACHE tells the kernel that any page cache instantiated
> by this operation should get pruned once the operation completes. If
> data is in cache prior to the operation it will remain there.
> 
> Add ops for testing both the read and write side of this. At startup,
> kernel support for this feature is probed. If support isn't available,
> uncached/dontcache IO is performed as regular buffered IO. If -Z is
> used to turn on O_DIRECT, then uncached/dontcache IO isn't performed.

Huh.  Does the kernel reject RWF_DONTCACHE for directio?  And, if a
directio implementation falls back to the pagecache (e.g. xfs when doing
a sub-fsblock cow write), do we:

(a) want RWF_DONTCACHE to propagate through to the buffered io
implementation (which I think xfs does) and

(b) should filesystems *turn it on* any time they fall back, even if the
original IO request didn't set DONTCACHE?

(Aside from those questions, the code changes look good.)

--D

> Defaults to on if available, and adds a -T parameter to turn it off.
> 
> See the kernel posting adding support:
> 
> https://lore.kernel.org/linux-fsdevel/20241220154831.1086649-1-axboe@xxxxxxxxx/
> 
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
>  ltp/fsx.c | 114 ++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 76 insertions(+), 38 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 41933354328a..9efd2f5c86d1 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -43,6 +43,10 @@
>  # define MAP_FILE 0
>  #endif
>  
> +#ifndef RWF_DONTCACHE
> +#define RWF_DONTCACHE	0x80
> +#endif
> +
>  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
>  
>  /* Operation flags (bitmask) */
> @@ -101,7 +105,9 @@ int			logcount = 0;	/* total ops */
>  enum {
>  	/* common operations */
>  	OP_READ = 0,
> +	OP_READ_DONTCACHE,
>  	OP_WRITE,
> +	OP_WRITE_DONTCACHE,
>  	OP_MAPREAD,
>  	OP_MAPWRITE,
>  	OP_MAX_LITE,
> @@ -190,15 +196,16 @@ int	o_direct;			/* -Z */
>  int	aio = 0;
>  int	uring = 0;
>  int	mark_nr = 0;
> +int	dontcache_io = 1;
>  
>  int page_size;
>  int page_mask;
>  int mmap_mask;
> -int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
>  #define READ 0
>  #define WRITE 1
> -#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
> -#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
> +#define fsxread(a,b,c,d,f)	fsx_rw(READ, a,b,c,d,f)
> +#define fsxwrite(a,b,c,d,f)	fsx_rw(WRITE, a,b,c,d,f)
>  
>  struct timespec deadline;
>  
> @@ -266,7 +273,9 @@ prterr(const char *prefix)
>  
>  static const char *op_names[] = {
>  	[OP_READ] = "read",
> +	[OP_READ_DONTCACHE] = "read_dontcache",
>  	[OP_WRITE] = "write",
> +	[OP_WRITE_DONTCACHE] = "write_dontcache",
>  	[OP_MAPREAD] = "mapread",
>  	[OP_MAPWRITE] = "mapwrite",
>  	[OP_TRUNCATE] = "truncate",
> @@ -393,12 +402,14 @@ logdump(void)
>  				prt("\t******WWWW");
>  			break;
>  		case OP_READ:
> +		case OP_READ_DONTCACHE:
>  			prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
>  			    lp->args[1]);
>  			if (overlap)
>  				prt("\t***RRRR***");
>  			break;
> +		case OP_WRITE_DONTCACHE:
>  		case OP_WRITE:
>  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> @@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
>  }
>  
>  void
> -doread(unsigned offset, unsigned size)
> +doread(unsigned offset, unsigned size, int flags)
>  {
> -	off_t ret;
>  	unsigned iret;
>  
>  	offset -= offset % readbdy;
> @@ -818,12 +828,7 @@ doread(unsigned offset, unsigned size)
>  			(monitorend == -1 || offset <= monitorend))))))
>  		prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
>  		    offset, offset + size - 1, size);
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("doread: lseek");
> -		report_failure(140);
> -	}
> -	iret = fsxread(fd, temp_buf, size, offset);
> +	iret = fsxread(fd, temp_buf, size, offset, flags);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("doread: read");
> @@ -870,7 +875,6 @@ check_contents(void)
>  	unsigned map_offset;
>  	unsigned map_size;
>  	char *p;
> -	off_t ret;
>  	unsigned iret;
>  
>  	if (!check_buf) {
> @@ -885,13 +889,7 @@ check_contents(void)
>  	if (size == 0)
>  		return;
>  
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("doread: lseek");
> -		report_failure(140);
> -	}
> -
> -	iret = fsxread(fd, check_buf, size, offset);
> +	iret = fsxread(fd, check_buf, size, offset, 0);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("check_contents: read");
> @@ -1064,9 +1062,8 @@ update_file_size(unsigned offset, unsigned size)
>  }
>  
>  void
> -dowrite(unsigned offset, unsigned size)
> +dowrite(unsigned offset, unsigned size, int flags)
>  {
> -	off_t ret;
>  	unsigned iret;
>  
>  	offset -= offset % writebdy;
> @@ -1099,14 +1096,9 @@ dowrite(unsigned offset, unsigned size)
>  		       (monitorstart == -1 ||
>  			(offset + size > monitorstart &&
>  			(monitorend == -1 || offset <= monitorend))))))
> -		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
> -		    offset, offset + size - 1, size);
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("dowrite: lseek");
> -		report_failure(150);
> -	}
> -	iret = fsxwrite(fd, good_buf + offset, size, offset);
> +		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
> +		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
> +	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("dowrite: write");
> @@ -1954,6 +1946,26 @@ do_preallocate(unsigned offset, unsigned length, int keep_size, int unshare)
>  }
>  #endif
>  
> +int
> +test_dontcache_io(void)
> +{
> +	char buf[4096];
> +	struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) };
> +	int ret, e;
> +
> +	ret = preadv2(fd, &iov, 1, 0, RWF_DONTCACHE);
> +	e = ret < 0 ? errno : 0;
> +	if (e == EOPNOTSUPP) {
> +		if (!quiet)
> +			fprintf(stderr,
> +				"main: filesystem does not support "
> +				"dontcache IO, disabling!\n");
> +		return 0;
> +	}
> +
> +	return 1;
> +}
> +
>  void
>  writefileimage()
>  {
> @@ -2337,12 +2349,28 @@ have_op:
>  	switch (op) {
>  	case OP_READ:
>  		TRIM_OFF_LEN(offset, size, file_size);
> -		doread(offset, size);
> +		doread(offset, size, 0);
> +		break;
> +
> +	case OP_READ_DONTCACHE:
> +		TRIM_OFF_LEN(offset, size, file_size);
> +		if (dontcache_io)
> +			doread(offset, size, RWF_DONTCACHE);
> +		else
> +			doread(offset, size, 0);
>  		break;
>  
>  	case OP_WRITE:
>  		TRIM_OFF_LEN(offset, size, maxfilelen);
> -		dowrite(offset, size);
> +		dowrite(offset, size, 0);
> +		break;
> +
> +	case OP_WRITE_DONTCACHE:
> +		TRIM_OFF_LEN(offset, size, maxfilelen);
> +		if (dontcache_io)
> +			dowrite(offset, size, RWF_DONTCACHE);
> +		else
> +			dowrite(offset, size, 0);
>  		break;
>  
>  	case OP_MAPREAD:
> @@ -2538,6 +2566,7 @@ usage(void)
>  "	-0: Do not use exchange range calls\n"
>  #endif
>  "	-K: Do not use keep size\n\
> +	-T: Do not use dontcache IO\n\
>  	-L: fsxLite - no file creations & no file size changes\n\
>  	-N numops: total # operations to do (default infinity)\n\
>  	-O: use oplen (see -o flag) for every op (default random)\n\
> @@ -2546,7 +2575,7 @@ usage(void)
>  	-S seed: for random # generator (default 1) 0 gets timestamp\n\
>  	-W: mapped write operations DISabled\n\
>  	-X: Read file and compare to good buffer after every operation\n\
> -	-Z: O_DIRECT (use -R, -W, -r and -w too)\n\
> +	-Z: O_DIRECT (use -R, -W, -r and -w too, excludes dontcache IO)\n\
>  	--replay-ops=opsfile: replay ops from recorded .fsxops file\n\
>  	--record-ops[=opsfile]: dump ops file also on success. optionally specify ops file name\n\
>  	--duration=seconds: ignore any -N setting and run for this many seconds\n\
> @@ -2702,7 +2731,7 @@ uring_setup()
>  }
>  
>  int
> -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	struct io_uring_sqe     *sqe;
>  	struct io_uring_cqe     *cqe;
> @@ -2733,6 +2762,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  		} else {
>  			io_uring_prep_writev(sqe, fd, &iovec, 1, o);
>  		}
> +		sqe->rw_flags = flags;
>  
>  		ret = io_uring_submit_and_wait(&ring, 1);
>  		if (ret != 1) {
> @@ -2781,7 +2811,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  }
>  #else
>  int
> -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	fprintf(stderr, "io_rw: need IO_URING support!\n");
>  	exit(111);
> @@ -2789,19 +2819,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  #endif
>  
>  int
> -fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	int ret;
>  
>  	if (aio) {
>  		ret = aio_rw(rw, fd, buf, len, offset);
>  	} else if (uring) {
> -		ret = uring_rw(rw, fd, buf, len, offset);
> +		ret = uring_rw(rw, fd, buf, len, offset, flags);
>  	} else {
> +		struct iovec iov = { .iov_base = buf, .iov_len = len };
> +
>  		if (rw == READ)
> -			ret = read(fd, buf, len);
> +			ret = preadv2(fd, &iov, 1, offset, flags);
>  		else
> -			ret = write(fd, buf, len);
> +			ret = pwritev2(fd, &iov, 1, offset, flags);
>  	}
>  	return ret;
>  }
> @@ -3065,6 +3097,9 @@ main(int argc, char **argv)
>  			if (seed < 0)
>  				usage();
>  			break;
> +		case 'T':
> +			dontcache_io = 0;
> +			break;
>  		case 'W':
>  		        mapped_writes = 0;
>  			if (!quiet)
> @@ -3076,6 +3111,7 @@ main(int argc, char **argv)
>  		case 'Z':
>  			o_direct = O_DIRECT;
>  			o_flags |= O_DIRECT;
> +			dontcache_io = 0;
>  			break;
>  		case 254:  /* --duration */
>  			if (!optarg) {
> @@ -3293,6 +3329,8 @@ main(int argc, char **argv)
>  		copy_range_calls = test_copy_range();
>  	if (exchange_range_calls)
>  		exchange_range_calls = test_exchange_range();
> +	if (dontcache_io)
> +		dontcache_io = test_dontcache_io();
>  
>  	while (keep_running())
>  		if (!test())
> -- 
> 2.47.1
> 
> 




[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux