Re: [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 12, 2024 at 10:19:02AM -0700, Jens Axboe wrote:
> On 11/12/24 10:06 AM, Jens Axboe wrote:
> > On 11/12/24 9:39 AM, Brian Foster wrote:
> >> On Tue, Nov 12, 2024 at 08:14:28AM -0700, Jens Axboe wrote:
> >>> On 11/11/24 10:13 PM, Christoph Hellwig wrote:
> >>>> On Mon, Nov 11, 2024 at 04:42:25PM -0700, Jens Axboe wrote:
> >>>>> Here's the slightly cleaned up version, this is the one I ran testing
> >>>>> with.
> >>>>
> >>>> Looks reasonable to me, but you probably get better reviews on the
> >>>> fstests lists.
> >>>
> >>> I'll send it out once this patchset is a bit closer to integration,
> >>> there's the usual chicken and egg situation with it. For now, it's quite
> >>> handy for my testing, found a few issues with this version. So thanks
> >>> for the suggestion, sure beats writing more of your own test cases :-)
> >>>
> >>
> >> fsx support is probably a good idea as well. It's similar in idea to
> >> fsstress, but bashes the same file with mixed operations and includes
> >> data integrity validation checks as well. It's pretty useful for
> >> uncovering subtle corner case issues or bad interactions..
> > 
> > Indeed, I did that too. Re-running xfstests right now with that too.
> 
> Here's what I'm running right now, fwiw. It adds RWF_UNCACHED support
> for both the sync read/write and io_uring paths.
> 

Nice, thanks. Looks reasonable to me at first glance. A few randomish
comments inlined below.

BTW, I should have also mentioned that fsx is also useful for longer
soak testing. I.e., fstests will provide a decent amount of coverage as
is via the various preexisting tests, but I'll occasionally run fsx
directly and let it run overnight or something to get the op count at
least up in the 100 millions or so to have a little more confidence
there isn't some rare/subtle bug lurking. That might be helpful with
something like this. JFYI.

> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 41933354..104910ff 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -43,6 +43,10 @@
>  # define MAP_FILE 0
>  #endif
>  
> +#ifndef RWF_UNCACHED
> +#define RWF_UNCACHED	0x80
> +#endif
> +
>  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
>  
>  /* Operation flags (bitmask) */
> @@ -101,7 +105,9 @@ int			logcount = 0;	/* total ops */
>  enum {
>  	/* common operations */
>  	OP_READ = 0,
> +	OP_READ_UNCACHED,
>  	OP_WRITE,
> +	OP_WRITE_UNCACHED,
>  	OP_MAPREAD,
>  	OP_MAPWRITE,
>  	OP_MAX_LITE,
> @@ -190,15 +196,16 @@ int	o_direct;			/* -Z */
>  int	aio = 0;
>  int	uring = 0;
>  int	mark_nr = 0;
> +int	rwf_uncached = 1;
>  
>  int page_size;
>  int page_mask;
>  int mmap_mask;
> -int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
>  #define READ 0
>  #define WRITE 1
> -#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
> -#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
> +#define fsxread(a,b,c,d,f)	fsx_rw(READ, a,b,c,d,f)
> +#define fsxwrite(a,b,c,d,f)	fsx_rw(WRITE, a,b,c,d,f)
>  

My pattern recognition brain wants to see an 'e' here. ;)

>  struct timespec deadline;
>  
> @@ -266,7 +273,9 @@ prterr(const char *prefix)
>  
>  static const char *op_names[] = {
>  	[OP_READ] = "read",
> +	[OP_READ_UNCACHED] = "read_uncached",
>  	[OP_WRITE] = "write",
> +	[OP_WRITE_UNCACHED] = "write_uncached",
>  	[OP_MAPREAD] = "mapread",
>  	[OP_MAPWRITE] = "mapwrite",
>  	[OP_TRUNCATE] = "truncate",
> @@ -393,12 +402,14 @@ logdump(void)
>  				prt("\t******WWWW");
>  			break;
>  		case OP_READ:
> +		case OP_READ_UNCACHED:
>  			prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
>  			    lp->args[1]);
>  			if (overlap)
>  				prt("\t***RRRR***");
>  			break;
> +		case OP_WRITE_UNCACHED:
>  		case OP_WRITE:
>  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> @@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
>  }
>  
>  void
> -doread(unsigned offset, unsigned size)
> +__doread(unsigned offset, unsigned size, int flags)
>  {
> -	off_t ret;
>  	unsigned iret;
>  
>  	offset -= offset % readbdy;
> @@ -818,23 +828,39 @@ doread(unsigned offset, unsigned size)
>  			(monitorend == -1 || offset <= monitorend))))))
>  		prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
>  		    offset, offset + size - 1, size);
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("doread: lseek");
> -		report_failure(140);
> -	}
> -	iret = fsxread(fd, temp_buf, size, offset);
> +	iret = fsxread(fd, temp_buf, size, offset, flags);
>  	if (iret != size) {
> -		if (iret == -1)
> -			prterr("doread: read");
> -		else
> +		if (iret == -1) {
> +			if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
> +				rwf_uncached = 1;

I assume you meant rwf_uncached = 0 here?

If so, check out test_fallocate() and friends to see how various
operations are tested for support before the test starts. Following that
might clean things up a bit.

Also it's useful to have a CLI option to enable/disable individual
features. That tends to be helpful to narrow things down when it does
happen to explode and you want to narrow down the cause.

Brian

> +				return;
> +			}
> +			prterr("dowrite: read");
> +		} else {
>  			prt("short read: 0x%x bytes instead of 0x%x\n",
>  			    iret, size);
> +		}
>  		report_failure(141);
>  	}
>  	check_buffers(temp_buf, offset, size);
>  }
> +void
> +doread(unsigned offset, unsigned size)
> +{
> +	__doread(offset, size, 0);
> +}
>  
> +void
> +doread_uncached(unsigned offset, unsigned size)
> +{
> +	if (rwf_uncached) {
> +		__doread(offset, size, RWF_UNCACHED);
> +		if (rwf_uncached)
> +			return;
> +	}
> +	__doread(offset, size, 0);
> +}
> +	
>  void
>  check_eofpage(char *s, unsigned offset, char *p, int size)
>  {
> @@ -870,7 +896,6 @@ check_contents(void)
>  	unsigned map_offset;
>  	unsigned map_size;
>  	char *p;
> -	off_t ret;
>  	unsigned iret;
>  
>  	if (!check_buf) {
> @@ -885,13 +910,7 @@ check_contents(void)
>  	if (size == 0)
>  		return;
>  
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("doread: lseek");
> -		report_failure(140);
> -	}
> -
> -	iret = fsxread(fd, check_buf, size, offset);
> +	iret = fsxread(fd, check_buf, size, offset, 0);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("check_contents: read");
> @@ -1064,9 +1083,8 @@ update_file_size(unsigned offset, unsigned size)
>  }
>  
>  void
> -dowrite(unsigned offset, unsigned size)
> +__dowrite(unsigned offset, unsigned size, int flags)
>  {
> -	off_t ret;
>  	unsigned iret;
>  
>  	offset -= offset % writebdy;
> @@ -1101,18 +1119,18 @@ dowrite(unsigned offset, unsigned size)
>  			(monitorend == -1 || offset <= monitorend))))))
>  		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
>  		    offset, offset + size - 1, size);
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("dowrite: lseek");
> -		report_failure(150);
> -	}
> -	iret = fsxwrite(fd, good_buf + offset, size, offset);
> +	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
>  	if (iret != size) {
> -		if (iret == -1)
> +		if (iret == -1) {
> +			if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
> +				rwf_uncached = 0;
> +				return;
> +			}
>  			prterr("dowrite: write");
> -		else
> +		} else {
>  			prt("short write: 0x%x bytes instead of 0x%x\n",
>  			    iret, size);
> +		}
>  		report_failure(151);
>  	}
>  	if (do_fsync) {
> @@ -1126,6 +1144,22 @@ dowrite(unsigned offset, unsigned size)
>  	}
>  }
>  
> +void
> +dowrite(unsigned offset, unsigned size)
> +{
> +	__dowrite(offset, size, 0);
> +}
> +
> +void
> +dowrite_uncached(unsigned offset, unsigned size)
> +{
> +	if (rwf_uncached) {
> +		__dowrite(offset, size, RWF_UNCACHED);
> +		if (rwf_uncached)
> +			return;
> +	}
> +	__dowrite(offset, size, 0);
> +}
>  
>  void
>  domapwrite(unsigned offset, unsigned size)
> @@ -2340,11 +2374,21 @@ have_op:
>  		doread(offset, size);
>  		break;
>  
> +	case OP_READ_UNCACHED:
> +		TRIM_OFF_LEN(offset, size, file_size);
> +		doread_uncached(offset, size);
> +		break;
> +
>  	case OP_WRITE:
>  		TRIM_OFF_LEN(offset, size, maxfilelen);
>  		dowrite(offset, size);
>  		break;
>  
> +	case OP_WRITE_UNCACHED:
> +		TRIM_OFF_LEN(offset, size, maxfilelen);
> +		dowrite_uncached(offset, size);
> +		break;
> +
>  	case OP_MAPREAD:
>  		TRIM_OFF_LEN(offset, size, file_size);
>  		domapread(offset, size);
> @@ -2702,7 +2746,7 @@ uring_setup()
>  }
>  
>  int
> -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	struct io_uring_sqe     *sqe;
>  	struct io_uring_cqe     *cqe;
> @@ -2733,6 +2777,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  		} else {
>  			io_uring_prep_writev(sqe, fd, &iovec, 1, o);
>  		}
> +		sqe->rw_flags = flags;
>  
>  		ret = io_uring_submit_and_wait(&ring, 1);
>  		if (ret != 1) {
> @@ -2781,7 +2826,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  }
>  #else
>  int
> -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	fprintf(stderr, "io_rw: need IO_URING support!\n");
>  	exit(111);
> @@ -2789,19 +2834,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  #endif
>  
>  int
> -fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	int ret;
>  
>  	if (aio) {
>  		ret = aio_rw(rw, fd, buf, len, offset);
>  	} else if (uring) {
> -		ret = uring_rw(rw, fd, buf, len, offset);
> +		ret = uring_rw(rw, fd, buf, len, offset, flags);
>  	} else {
> +		struct iovec iov = { .iov_base = buf, .iov_len = len };
> +
>  		if (rw == READ)
> -			ret = read(fd, buf, len);
> +			ret = preadv2(fd, &iov, 1, offset, flags);
>  		else
> -			ret = write(fd, buf, len);
> +			ret = pwritev2(fd, &iov, 1, offset, flags);
>  	}
>  	return ret;
>  }
> 
> 
> -- 
> Jens Axboe
> 





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux