Re: [RFC PATCH 32/35] netfs: Add some more RMW support for ceph

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2025-03-13 at 23:33 +0000, David Howells wrote:
> Add some support for RMW in ceph:
> 
>  (1) Add netfs_unbuffered_read_from_inode() to allow reading from an inode
>      without having a file pointer so that truncate can modify a
>      now-partial tail block of a content-encrypted file.
> 
>      This takes an additional argument to cause it to fail or give a short
>      read if a hole is encountered.  This is noted on the request with
>      NETFS_RREQ_NO_READ_HOLE for the filesystem to pick up.
> 
>  (2) Set NETFS_RREQ_RMW when doing an RMW as part of a request.
> 
>  (3) Provide a ->rmw_read_done() op for netfslib to tell the filesystem
>      that it has completed the read required for RMW.
> 
> Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
> cc: Jeff Layton <jlayton@xxxxxxxxxx>
> cc: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
> cc: Alex Markuze <amarkuze@xxxxxxxxxx>
> cc: Ilya Dryomov <idryomov@xxxxxxxxx>
> cc: ceph-devel@xxxxxxxxxxxxxxx
> cc: linux-fsdevel@xxxxxxxxxxxxxxx
> ---
>  fs/netfs/direct_read.c       | 75 ++++++++++++++++++++++++++++++++++++
>  fs/netfs/direct_write.c      |  1 +
>  fs/netfs/main.c              |  1 +
>  fs/netfs/objects.c           |  1 +
>  fs/netfs/read_collect.c      |  2 +
>  fs/netfs/write_retry.c       |  3 ++
>  include/linux/netfs.h        |  7 ++++
>  include/trace/events/netfs.h |  3 ++
>  8 files changed, 93 insertions(+)
> 
> diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
> index 5e4bd1e5a378..4061f934dfe6 100644
> --- a/fs/netfs/direct_read.c
> +++ b/fs/netfs/direct_read.c
> @@ -373,3 +373,78 @@ ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>  	return ret;
>  }
>  EXPORT_SYMBOL(netfs_unbuffered_read_iter);
> +
> +/**
> + * netfs_unbuffered_read_from_inode - Perform an unbuffered sync I/O read
> + * @inode: The inode being accessed
> + * @pos: The file position to read from
> + * @iter: The output buffer (also specifies read length)
> + * @nohole: True to return short/ENODATA if hole encountered
> + *
> + * Perform a synchronous unbuffered I/O from the inode to the output buffer.
> + * No use is made of the pagecache.  The output buffer must be suitably aligned
> + * if content encryption is to be used.  If @nohole is true then the read will
> + * stop short if a hole is encountered and return -ENODATA if the read begins
> + * with a hole.
> + *
> + * The caller must hold any appropriate locks.
> + */
> +ssize_t netfs_unbuffered_read_from_inode(struct inode *inode, loff_t pos,
> +					 struct iov_iter *iter, bool nohole)
> +{
> +	struct netfs_io_request *rreq;
> +	ssize_t ret;
> +	size_t orig_count = iov_iter_count(iter);
> +
> +	_enter("");
> +
> +	if (WARN_ON(user_backed_iter(iter)))
> +		return -EIO;
> +
> +	if (!orig_count)
> +		return 0; /* Don't update atime */
> +
> +	ret = filemap_write_and_wait_range(inode->i_mapping, pos, orig_count);
> +	if (ret < 0)
> +		return ret;
> +	inode_update_time(inode, S_ATIME);
> +
> +	rreq = netfs_alloc_request(inode->i_mapping, NULL, pos, orig_count,
> +				   NULL, NETFS_UNBUFFERED_READ);
> +	if (IS_ERR(rreq))
> +		return PTR_ERR(rreq);
> +
> +	ret = -EIO;
> +	if (test_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags) &&
> +	    WARN_ON(!netfs_is_crypto_aligned(rreq, iter)))
> +		goto out;
> +
> +	netfs_stat(&netfs_n_rh_dio_read);
> +	trace_netfs_read(rreq, rreq->start, rreq->len,
> +			 netfs_read_trace_unbuffered_read_from_inode);
> +
> +	rreq->buffer.iter	= *iter;

The struct iov_iter structure is complex enough and we assign it by value to
rreq->buffer.iter. So, the initial pointer will not receive any changes then. Is
it desired behavior here?

Thanks,
Slava.

> +	rreq->len		= orig_count;
> +	rreq->direct_bv_unpin	= false;
> +	iov_iter_advance(iter, orig_count);
> +
> +	if (nohole)
> +		__set_bit(NETFS_RREQ_NO_READ_HOLE, &rreq->flags);
> +
> +	/* We're going to do the crypto in place in the destination buffer. */
> +	if (test_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags))
> +		__set_bit(NETFS_RREQ_CRYPT_IN_PLACE, &rreq->flags);
> +
> +	ret = netfs_dispatch_unbuffered_reads(rreq);
> +
> +	if (!rreq->submitted) {
> +		netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
> +		goto out;
> +	}
> +
> +	ret = netfs_wait_for_read(rreq);
> +out:
> +	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
> +	return ret;
> +}
> +EXPORT_SYMBOL(netfs_unbuffered_read_from_inode);
> diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
> index 83c5c06c4710..a99722f90c71 100644
> --- a/fs/netfs/direct_write.c
> +++ b/fs/netfs/direct_write.c
> @@ -145,6 +145,7 @@ static ssize_t netfs_write_through_bounce_buffer(struct netfs_io_request *wreq,
>  		wreq->start		= gstart;
>  		wreq->len		= gend - gstart;
>  
> +		__set_bit(NETFS_RREQ_RMW, &ictx->flags);
>  		if (gstart >= end) {
>  			/* At or after EOF, nothing to read. */
>  		} else {
> diff --git a/fs/netfs/main.c b/fs/netfs/main.c
> index 07f8cffbda8c..0900dea53e4a 100644
> --- a/fs/netfs/main.c
> +++ b/fs/netfs/main.c
> @@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
>  	[NETFS_READ_GAPS]		= "RG",
>  	[NETFS_READ_SINGLE]		= "R1",
>  	[NETFS_READ_FOR_WRITE]		= "RW",
> +	[NETFS_UNBUFFERED_READ]		= "UR",
>  	[NETFS_DIO_READ]		= "DR",
>  	[NETFS_WRITEBACK]		= "WB",
>  	[NETFS_WRITEBACK_SINGLE]	= "W1",
> diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
> index 4606e830c116..958c4d460d07 100644
> --- a/fs/netfs/objects.c
> +++ b/fs/netfs/objects.c
> @@ -60,6 +60,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
>  	    origin == NETFS_READ_GAPS ||
>  	    origin == NETFS_READ_SINGLE ||
>  	    origin == NETFS_READ_FOR_WRITE ||
> +	    origin == NETFS_UNBUFFERED_READ ||
>  	    origin == NETFS_DIO_READ) {
>  		INIT_WORK(&rreq->work, netfs_read_collection_worker);
>  		rreq->io_streams[0].avail = true;
> diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
> index 0a0bff90ca9e..013a90738dcd 100644
> --- a/fs/netfs/read_collect.c
> +++ b/fs/netfs/read_collect.c
> @@ -462,6 +462,7 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
>  	//netfs_rreq_is_still_valid(rreq);
>  
>  	switch (rreq->origin) {
> +	case NETFS_UNBUFFERED_READ:
>  	case NETFS_DIO_READ:
>  	case NETFS_READ_GAPS:
>  	case NETFS_RMW_READ:
> @@ -681,6 +682,7 @@ ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
>  	if (ret == 0) {
>  		ret = rreq->transferred;
>  		switch (rreq->origin) {
> +		case NETFS_UNBUFFERED_READ:
>  		case NETFS_DIO_READ:
>  		case NETFS_READ_SINGLE:
>  			ret = rreq->transferred;
> diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
> index f727b48e2bfe..9e4e79d5a403 100644
> --- a/fs/netfs/write_retry.c
> +++ b/fs/netfs/write_retry.c
> @@ -386,6 +386,9 @@ ssize_t netfs_rmw_read(struct netfs_io_request *wreq, struct file *file,
>  		ret = 0;
>  	}
>  
> +	if (ret == 0 && rreq->netfs_ops->rmw_read_done)
> +		rreq->netfs_ops->rmw_read_done(wreq, rreq);
> +
>  error:
>  	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
>  	return ret;
> diff --git a/include/linux/netfs.h b/include/linux/netfs.h
> index 9d17d4bd9753..4049c985b9b4 100644
> --- a/include/linux/netfs.h
> +++ b/include/linux/netfs.h
> @@ -220,6 +220,7 @@ enum netfs_io_origin {
>  	NETFS_READ_GAPS,		/* This read is a synchronous read to fill gaps */
>  	NETFS_READ_SINGLE,		/* This read should be treated as a single object */
>  	NETFS_READ_FOR_WRITE,		/* This read is to prepare a write */
> +	NETFS_UNBUFFERED_READ,		/* This is an unbuffered I/O read */
>  	NETFS_DIO_READ,			/* This is a direct I/O read */
>  	NETFS_WRITEBACK,		/* This write was triggered by writepages */
>  	NETFS_WRITEBACK_SINGLE,		/* This monolithic write was triggered by writepages */
> @@ -308,6 +309,9 @@ struct netfs_io_request {
>  #define NETFS_RREQ_CONTENT_ENCRYPTION	16	/* Content encryption is in use */
>  #define NETFS_RREQ_CRYPT_IN_PLACE	17	/* Do decryption in place */
>  #define NETFS_RREQ_PUT_RMW_TAIL		18	/* Need to put ->rmw_tail */
> +#define NETFS_RREQ_RMW			19	/* Performing RMW cycle */
> +#define NETFS_RREQ_REPEAT_RMW		20	/* Need to perform an RMW cycle */
> +#define NETFS_RREQ_NO_READ_HOLE		21	/* Give short read/error if hole encountered */
>  #define NETFS_RREQ_USE_PGPRIV2		31	/* [DEPRECATED] Use PG_private_2 to mark
>  						 * write to cache on read */
>  	const struct netfs_request_ops *netfs_ops;
> @@ -336,6 +340,7 @@ struct netfs_request_ops {
>  	/* Modification handling */
>  	void (*update_i_size)(struct inode *inode, loff_t i_size);
>  	void (*post_modify)(struct inode *inode, void *fs_priv);
> +	void (*rmw_read_done)(struct netfs_io_request *wreq, struct netfs_io_request *rreq);
>  
>  	/* Write request handling */
>  	void (*begin_writeback)(struct netfs_io_request *wreq);
> @@ -432,6 +437,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
>  ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
>  ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
>  ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
> +ssize_t netfs_unbuffered_read_from_inode(struct inode *inode, loff_t pos,
> +					 struct iov_iter *iter, bool nohole);
>  
>  /* High-level write API */
>  ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
> diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
> index 74af82d773bd..9254c6f0e604 100644
> --- a/include/trace/events/netfs.h
> +++ b/include/trace/events/netfs.h
> @@ -23,6 +23,7 @@
>  	EM(netfs_read_trace_read_gaps,		"READ-GAPS")	\
>  	EM(netfs_read_trace_read_single,	"READ-SNGL")	\
>  	EM(netfs_read_trace_prefetch_for_write,	"PREFETCHW")	\
> +	EM(netfs_read_trace_unbuffered_read_from_inode, "READ-INOD") \
>  	E_(netfs_read_trace_write_begin,	"WRITEBEGN")
>  
>  #define netfs_write_traces					\
> @@ -38,6 +39,7 @@
>  	EM(NETFS_READ_GAPS,			"RG")		\
>  	EM(NETFS_READ_SINGLE,			"R1")		\
>  	EM(NETFS_READ_FOR_WRITE,		"RW")		\
> +	EM(NETFS_UNBUFFERED_READ,		"UR")		\
>  	EM(NETFS_DIO_READ,			"DR")		\
>  	EM(NETFS_WRITEBACK,			"WB")		\
>  	EM(NETFS_WRITEBACK_SINGLE,		"W1")		\
> @@ -104,6 +106,7 @@
>  	EM(netfs_sreq_trace_io_progress,	"IO   ")	\
>  	EM(netfs_sreq_trace_limited,		"LIMIT")	\
>  	EM(netfs_sreq_trace_need_clear,		"N-CLR")	\
> +	EM(netfs_sreq_trace_need_rmw,		"N-RMW")	\
>  	EM(netfs_sreq_trace_partial_read,	"PARTR")	\
>  	EM(netfs_sreq_trace_need_retry,		"ND-RT")	\
>  	EM(netfs_sreq_trace_pending,		"PEND ")	\
> 
> 





[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux