Re: [PATCH v2 4/6] NFS: Add READ_PLUS data segment support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2020-02-14 at 17:28 -0500, Chuck Lever wrote:
> > On Feb 14, 2020, at 4:12 PM, schumaker.anna@xxxxxxxxx wrote:
> > 
> > From: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx>
> > 
> > This patch adds client support for decoding a single NFS4_CONTENT_DATA
> > segment returned by the server. This is the simplest implementation
> > possible, since it does not account for any hole segments in the reply.
> > 
> > Signed-off-by: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx>
> > ---
> > fs/nfs/nfs42xdr.c         | 138 ++++++++++++++++++++++++++++++++++++++
> > fs/nfs/nfs4proc.c         |  43 +++++++++++-
> > fs/nfs/nfs4xdr.c          |   1 +
> > include/linux/nfs4.h      |   2 +-
> > include/linux/nfs_fs_sb.h |   1 +
> > include/linux/nfs_xdr.h   |   2 +-
> > 6 files changed, 182 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > index c03f3246d6c5..bf118ecabe2c 100644
> > --- a/fs/nfs/nfs42xdr.c
> > +++ b/fs/nfs/nfs42xdr.c
> > @@ -45,6 +45,15 @@
> > #define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
> > 					 encode_fallocate_maxsz)
> > #define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
> > +#define encode_read_plus_maxsz		(op_encode_hdr_maxsz + \
> > +					 encode_stateid_maxsz + 3)
> > +#define NFS42_READ_PLUS_SEGMENT_SIZE	(1 /* data_content4 */ + \
> > +					 2 /* data_info4.di_offset */ + \
> > +					 2 /* data_info4.di_length */)
> > +#define decode_read_plus_maxsz		(op_decode_hdr_maxsz + \
> > +					 1 /* rpr_eof */ + \
> > +					 1 /* rpr_contents count */ + \
> > +					 NFS42_READ_PLUS_SEGMENT_SIZE)
> > #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
> > 					 encode_stateid_maxsz + \
> > 					 2 /* offset */ + \
> > @@ -128,6 +137,14 @@
> > 					 decode_putfh_maxsz + \
> > 					 decode_deallocate_maxsz + \
> > 					 decode_getattr_maxsz)
> > +#define NFS4_enc_read_plus_sz		(compound_encode_hdr_maxsz + \
> > +					 encode_sequence_maxsz + \
> > +					 encode_putfh_maxsz + \
> > +					 encode_read_plus_maxsz)
> > +#define NFS4_dec_read_plus_sz		(compound_decode_hdr_maxsz + \
> > +					 decode_sequence_maxsz + \
> > +					 decode_putfh_maxsz + \
> > +					 decode_read_plus_maxsz)
> > #define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
> > 					 encode_sequence_maxsz + \
> > 					 encode_putfh_maxsz + \
> > @@ -252,6 +269,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
> > 	encode_fallocate(xdr, args);
> > }
> > 
> > +static void encode_read_plus(struct xdr_stream *xdr,
> > +			     const struct nfs_pgio_args *args,
> > +			     struct compound_hdr *hdr)
> > +{
> > +	encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
> > +	encode_nfs4_stateid(xdr, &args->stateid);
> > +	encode_uint64(xdr, args->offset);
> > +	encode_uint32(xdr, args->count);
> > +}
> > +
> > static void encode_seek(struct xdr_stream *xdr,
> > 			const struct nfs42_seek_args *args,
> > 			struct compound_hdr *hdr)
> > @@ -446,6 +473,29 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst
> > *req,
> > 	encode_nops(&hdr);
> > }
> > 
> > +/*
> > + * Encode READ_PLUS request
> > + */
> > +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
> > +				   struct xdr_stream *xdr,
> > +				   const void *data)
> > +{
> > +	const struct nfs_pgio_args *args = data;
> > +	struct compound_hdr hdr = {
> > +		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
> > +	};
> > +
> > +	encode_compound_hdr(xdr, req, &hdr);
> > +	encode_sequence(xdr, &args->seq_args, &hdr);
> > +	encode_putfh(xdr, args->fh, &hdr);
> > +	encode_read_plus(xdr, args, &hdr);
> > +
> > +	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
> > +				args->count, hdr.replen);
> > +	req->rq_rcv_buf.flags |= XDRBUF_READ;
> 
> IMO this line is incorrect.

You're right, this line causes problems for RDMA with READ_PLUS. I added it to
match how the other xdr read encoders were set up
> 
> RFC 8267 Section 6.1 does not list any part of the result of READ_PLUS
> as DDP-eligible. There's no way for a client to know how to set up
> Write chunks, unless it knows exactly where the file's holes are in
> advance. Even then... racy.
> 
> Just curious, have you tried READ_PLUS with proto=rdma ?

I haven't done in-depth performance testing, but I have been able to run it.

Anna

> 
> 
> > +	encode_nops(&hdr);
> > +}
> > +
> > /*
> >  * Encode SEEK request
> >  */
> > @@ -694,6 +744,67 @@ static int decode_deallocate(struct xdr_stream *xdr,
> > struct nfs42_falloc_res *re
> > 	return decode_op_hdr(xdr, OP_DEALLOCATE);
> > }
> > 
> > +static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct
> > nfs_pgio_res *res,
> > +				      uint32_t *eof)
> > +{
> > +	__be32 *p;
> > +	uint32_t count, recvd;
> > +	uint64_t offset;
> > +
> > +	p = xdr_inline_decode(xdr, 8 + 4);
> > +	if (unlikely(!p))
> > +		return -EIO;
> > +
> > +	p = xdr_decode_hyper(p, &offset);
> > +	count = be32_to_cpup(p);
> > +	if (count == 0)
> > +		return 0;
> > +
> > +	recvd = xdr_read_pages(xdr, count);
> > +	if (count > recvd) {
> > +		dprintk("NFS: server cheating in read reply: "
> > +				"count %u > recvd %u\n", count, recvd);
> > +		count = recvd;
> > +		*eof = 0;
> > +	}
> > +
> > +	return count;
> > +}
> > +
> > +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res
> > *res)
> > +{
> > +	__be32 *p;
> > +	uint32_t count, eof, segments, type;
> > +	int status;
> > +
> > +	status = decode_op_hdr(xdr, OP_READ_PLUS);
> > +	if (status)
> > +		return status;
> > +
> > +	p = xdr_inline_decode(xdr, 4 + 4);
> > +	if (unlikely(!p))
> > +		return -EIO;
> > +
> > +	eof = be32_to_cpup(p++);
> > +	segments = be32_to_cpup(p++);
> > +	if (segments == 0)
> > +		return 0;
> > +
> > +	p = xdr_inline_decode(xdr, 4);
> > +	if (unlikely(!p))
> > +		return -EIO;
> > +
> > +	type = be32_to_cpup(p++);
> > +	if (type == NFS4_CONTENT_DATA)
> > +		count = decode_read_plus_data(xdr, res, &eof);
> > +	else
> > +		return -EINVAL;
> > +
> > +	res->eof = eof;
> > +	res->count = count;
> > +	return 0;
> > +}
> > +
> > static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
> > {
> > 	int status;
> > @@ -870,6 +981,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst
> > *rqstp,
> > 	return status;
> > }
> > 
> > +/*
> > + * Decode READ_PLUS request
> > + */
> > +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
> > +				  struct xdr_stream *xdr,
> > +				  void *data)
> > +{
> > +	struct nfs_pgio_res *res = data;
> > +	struct compound_hdr hdr;
> > +	int status;
> > +
> > +	status = decode_compound_hdr(xdr, &hdr);
> > +	if (status)
> > +		goto out;
> > +	status = decode_sequence(xdr, &res->seq_res, rqstp);
> > +	if (status)
> > +		goto out;
> > +	status = decode_putfh(xdr);
> > +	if (status)
> > +		goto out;
> > +	status = decode_read_plus(xdr, res);
> > +	if (!status)
> > +		status = res->count;
> > +out:
> > +	return status;
> > +}
> > +
> > /*
> >  * Decode SEEK request
> >  */
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 95d07a3dc5d1..ed3ec8c36273 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -69,6 +69,10 @@
> > 
> > #include "nfs4trace.h"
> > 
> > +#ifdef CONFIG_NFS_V4_2
> > +#include "nfs42.h"
> > +#endif /* CONFIG_NFS_V4_2 */
> > +
> > #define NFSDBG_FACILITY		NFSDBG_PROC
> > 
> > #define NFS4_BITMASK_SZ		3
> > @@ -5199,28 +5203,60 @@ static bool nfs4_read_stateid_changed(struct
> > rpc_task *task,
> > 	return true;
> > }
> > 
> > +static bool nfs4_read_plus_not_supported(struct rpc_task *task,
> > +					 struct nfs_pgio_header *hdr)
> > +{
> > +	struct nfs_server *server = NFS_SERVER(hdr->inode);
> > +	struct rpc_message *msg = &task->tk_msg;
> > +
> > +	if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
> > +	    server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
> > +		server->caps &= ~NFS_CAP_READ_PLUS;
> > +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> > +		rpc_restart_call_prepare(task);
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +
> > static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header
> > *hdr)
> > {
> > -
> > 	dprintk("--> %s\n", __func__);
> > 
> > 	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
> > 		return -EAGAIN;
> > 	if (nfs4_read_stateid_changed(task, &hdr->args))
> > 		return -EAGAIN;
> > +	if (nfs4_read_plus_not_supported(task, hdr))
> > +		return -EAGAIN;
> > 	if (task->tk_status > 0)
> > 		nfs_invalidate_atime(hdr->inode);
> > 	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
> > 				    nfs4_read_done_cb(task, hdr);
> > }
> > 
> > +#ifdef CONFIG_NFS_V4_2
> > +static void nfs42_read_plus_support(struct nfs_server *server, struct
> > rpc_message *msg)
> > +{
> > +	if (server->caps & NFS_CAP_READ_PLUS)
> > +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
> > +	else
> > +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> > +}
> > +#else
> > +static void nfs42_read_plus_support(struct nfs_server *server, struct
> > rpc_message *msg)
> > +{
> > +	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> > +}
> > +#endif /* CONFIG_NFS_V4_2 */
> > +
> > static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
> > 				 struct rpc_message *msg)
> > {
> > 	hdr->timestamp   = jiffies;
> > 	if (!hdr->pgio_done_cb)
> > 		hdr->pgio_done_cb = nfs4_read_done_cb;
> > -	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> > +	nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
> > 	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
> > }
> > 
> > @@ -9970,7 +10006,8 @@ static const struct nfs4_minor_version_ops
> > nfs_v4_2_minor_ops = {
> > 		| NFS_CAP_SEEK
> > 		| NFS_CAP_LAYOUTSTATS
> > 		| NFS_CAP_CLONE
> > -		| NFS_CAP_LAYOUTERROR,
> > +		| NFS_CAP_LAYOUTERROR
> > +		| NFS_CAP_READ_PLUS,
> > 	.init_client = nfs41_init_client,
> > 	.shutdown_client = nfs41_shutdown_client,
> > 	.match_stateid = nfs41_match_stateid,
> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > index 47817ef0aadb..68b2917d0537 100644
> > --- a/fs/nfs/nfs4xdr.c
> > +++ b/fs/nfs/nfs4xdr.c
> > @@ -7584,6 +7584,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
> > 	PROC42(COPY_NOTIFY,	enc_copy_notify,	dec_copy_notify),
> > 	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
> > 	PROC42(LAYOUTERROR,	enc_layouterror,	dec_layouterror),
> > +	PROC42(READ_PLUS,	enc_read_plus,		dec_read_plus),
> > };
> > 
> > static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
> > diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> > index 82d8fb422092..c1eeef52545c 100644
> > --- a/include/linux/nfs4.h
> > +++ b/include/linux/nfs4.h
> > @@ -540,8 +540,8 @@ enum {
> > 
> > 	NFSPROC4_CLNT_LOOKUPP,
> > 	NFSPROC4_CLNT_LAYOUTERROR,
> > -
> > 	NFSPROC4_CLNT_COPY_NOTIFY,
> > +	NFSPROC4_CLNT_READ_PLUS,
> > };
> > 
> > /* nfs41 types */
> > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> > index 465fa98258a3..11248c5a7b24 100644
> > --- a/include/linux/nfs_fs_sb.h
> > +++ b/include/linux/nfs_fs_sb.h
> > @@ -281,5 +281,6 @@ struct nfs_server {
> > #define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
> > #define NFS_CAP_LAYOUTERROR	(1U << 26)
> > #define NFS_CAP_COPY_NOTIFY	(1U << 27)
> > +#define NFS_CAP_READ_PLUS	(1U << 28)
> > 
> > #endif
> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > index 94c77ed55ce1..8efbf3d8b263 100644
> > --- a/include/linux/nfs_xdr.h
> > +++ b/include/linux/nfs_xdr.h
> > @@ -655,7 +655,7 @@ struct nfs_pgio_args {
> > struct nfs_pgio_res {
> > 	struct nfs4_sequence_res	seq_res;
> > 	struct nfs_fattr *	fattr;
> > -	__u32			count;
> > +	__u64			count;
> > 	__u32			op_status;
> > 	union {
> > 		struct {
> > -- 
> > 2.25.0
> > 
> 
> --
> Chuck Lever
> 
> 
> 




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux