Re: [PATCH v3 3/6] NFSD: Add READ_PLUS data support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Chuck,

On Mon, Aug 3, 2020 at 3:19 PM Chuck Lever <chuck.lever@xxxxxxxxxx> wrote:
>
> Hi Anna-
>
> > On Aug 3, 2020, at 12:59 PM, schumaker.anna@xxxxxxxxx wrote:
> >
> > From: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx>
> >
> > This patch adds READ_PLUS support for returning a single
> > NFS4_CONTENT_DATA segment to the client. This is basically the same as
> > the READ operation, only with the extra information about data segments.
> >
> > Signed-off-by: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx>
> > ---
> > fs/nfsd/nfs4proc.c | 17 ++++++++++
> > fs/nfsd/nfs4xdr.c  | 85 ++++++++++++++++++++++++++++++++++++++++++++--
> > 2 files changed, 100 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index a09c35f0f6f0..9630d33211f2 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -2523,6 +2523,16 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> >       return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > }
> >
> > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > +{
> > +     u32 maxcount = svc_max_payload(rqstp);
> > +     u32 rlen = min(op->u.read.rd_length, maxcount);
> > +     /* enough extra xdr space for encoding either a hole or data segment. */
> > +     u32 segments = 1 + 2 + 2;
> > +
> > +     return (op_encode_hdr_size + 2 + segments + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > +}
> > +
> > static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > {
> >       u32 maxcount = 0, rlen = 0;
> > @@ -3059,6 +3069,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> >               .op_name = "OP_COPY",
> >               .op_rsize_bop = nfsd4_copy_rsize,
> >       },
> > +     [OP_READ_PLUS] = {
> > +             .op_func = nfsd4_read,
> > +             .op_release = nfsd4_read_release,
> > +             .op_name = "OP_READ_PLUS",
> > +             .op_rsize_bop = nfsd4_read_plus_rsize,
> > +             .op_get_currentstateid = nfsd4_get_readstateid,
> > +     },
> >       [OP_SEEK] = {
> >               .op_func = nfsd4_seek,
> >               .op_name = "OP_SEEK",
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 6a1c0a7fae05..1d143bf02b83 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1957,7 +1957,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> >       [OP_LAYOUTSTATS]        = (nfsd4_dec)nfsd4_decode_notsupp,
> >       [OP_OFFLOAD_CANCEL]     = (nfsd4_dec)nfsd4_decode_offload_status,
> >       [OP_OFFLOAD_STATUS]     = (nfsd4_dec)nfsd4_decode_offload_status,
> > -     [OP_READ_PLUS]          = (nfsd4_dec)nfsd4_decode_notsupp,
> > +     [OP_READ_PLUS]          = (nfsd4_dec)nfsd4_decode_read,
> >       [OP_SEEK]               = (nfsd4_dec)nfsd4_decode_seek,
> >       [OP_WRITE_SAME]         = (nfsd4_dec)nfsd4_decode_notsupp,
> >       [OP_CLONE]              = (nfsd4_dec)nfsd4_decode_clone,
> > @@ -4367,6 +4367,87 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> >               return nfserr_resource;
> >       p = xdr_encode_hyper(p, os->count);
> >       *p++ = cpu_to_be32(0);
> > +     return nfserr;
> > +}
> > +
> > +static __be32
> > +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > +                         struct nfsd4_read *read,
> > +                         unsigned long maxcount,  u32 *eof)
> > +{
> > +     struct xdr_stream *xdr = &resp->xdr;
> > +     struct file *file = read->rd_nf->nf_file;
> > +     int starting_len = xdr->buf->len;
> > +     __be32 nfserr;
> > +     __be32 *p, tmp;
> > +     __be64 tmp64;
> > +
> > +     /* Content type, offset, byte count */
> > +     p = xdr_reserve_space(xdr, 4 + 8 + 4);
> > +     if (!p)
> > +             return nfserr_resource;
> > +
> > +     read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> > +     if (read->rd_vlen < 0)
> > +             return nfserr_resource;
> > +
> > +     nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> > +                         resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
>
> nfsd4_encode_read() has this:
>
> 3904         if (file->f_op->splice_read &&
> 3905             test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
> 3906                 nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
> 3907         else
> 3908                 nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>
> So READ_PLUS never uses ->splice_read.
>
> readv is known to be less efficient than splice. Is there a measurable
> server performance impact (either latency or CPU utilization) when
> reading a file with no holes?

Good question, and I haven't measured that. For a while I was doing
the first read data segment with a splice, but that was having issues.
I should circle back and see if that's still a problem now that I've
fixed a bunch of other bugs.

>
>
> > +     if (nfserr)
> > +             return nfserr;
> > +     if (svc_encode_read_payload(resp->rqstp, starting_len + 16, maxcount))
> > +             return nfserr_io;
>
> Not sure you want a read_payload call here. This is to notify the
> transport that there is a section of the message that can be sent
> via RDMA, but READ_PLUS has no DDP-eligible data items; especially
> not holes!

Sure thing. I'll remove that for the next version.

Anna
>
> Although, the call is not likely to be much more than a no-op,
> since clients won't be providing any Write chunks for READ_PLUS.
>
>
> > +
> > +     tmp = htonl(NFS4_CONTENT_DATA);
> > +     write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
> > +     tmp64 = cpu_to_be64(read->rd_offset);
> > +     write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
> > +     tmp = htonl(maxcount);
> > +     write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
> > +     return nfs_ok;
> > +}
> > +
> > +static __be32
> > +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > +                    struct nfsd4_read *read)
> > +{
> > +     unsigned long maxcount;
> > +     struct xdr_stream *xdr = &resp->xdr;
> > +     struct file *file;
> > +     int starting_len = xdr->buf->len;
> > +     int segments = 0;
> > +     __be32 *p, tmp;
> > +     u32 eof;
> > +
> > +     if (nfserr)
> > +             return nfserr;
> > +     file = read->rd_nf->nf_file;
> > +
> > +     /* eof flag, segment count */
> > +     p = xdr_reserve_space(xdr, 4 + 4);
> > +     if (!p)
> > +             return nfserr_resource;
> > +     xdr_commit_encode(xdr);
> > +
> > +     maxcount = svc_max_payload(resp->rqstp);
> > +     maxcount = min_t(unsigned long, maxcount,
> > +                      (xdr->buf->buflen - xdr->buf->len));
> > +     maxcount = min_t(unsigned long, maxcount, read->rd_length);
> > +
> > +     eof = read->rd_offset >= i_size_read(file_inode(file));
> > +     if (!eof) {
> > +             nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> > +             segments++;
> > +     }
> > +
> > +     if (nfserr)
> > +             xdr_truncate_encode(xdr, starting_len);
> > +     else {
> > +             tmp = htonl(eof);
> > +             write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
> > +             tmp = htonl(segments);
> > +             write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > +     }
> >
> >       return nfserr;
> > }
> > @@ -4509,7 +4590,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> >       [OP_LAYOUTSTATS]        = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_OFFLOAD_CANCEL]     = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_OFFLOAD_STATUS]     = (nfsd4_enc)nfsd4_encode_offload_status,
> > -     [OP_READ_PLUS]          = (nfsd4_enc)nfsd4_encode_noop,
> > +     [OP_READ_PLUS]          = (nfsd4_enc)nfsd4_encode_read_plus,
> >       [OP_SEEK]               = (nfsd4_enc)nfsd4_encode_seek,
> >       [OP_WRITE_SAME]         = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_CLONE]              = (nfsd4_enc)nfsd4_encode_noop,
> > --
> > 2.27.0
> >
>
> --
> Chuck Lever
>
>
>



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux