Re: [PATCH v2 1/1] NFSD: Simplify READ_PLUS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On Sep 7, 2022, at 4:37 PM, Anna Schumaker <anna@xxxxxxxxxx> wrote:
> 
> On Wed, Sep 7, 2022 at 4:29 PM Chuck Lever III <chuck.lever@xxxxxxxxxx> wrote:
>> 
>> Be sure to Cc: Jeff on these. Thanks!
>> 
>> 
>>> On Sep 7, 2022, at 3:52 PM, Anna Schumaker <anna@xxxxxxxxxx> wrote:
>>> 
>>> From: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx>
>>> 
>>> Chuck had suggested reverting READ_PLUS so it returns a single DATA
>>> segment covering the requested read range. This prepares the server for
>>> a future "sparse read" function so support can easily be added without
>>> needing to rip out the old READ_PLUS code at the same time.
>>> 
>>> Signed-off-by: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx>
>>> ---
>>> fs/nfsd/nfs4xdr.c | 139 +++++++++++-----------------------------------
>>> 1 file changed, 32 insertions(+), 107 deletions(-)
>>> 
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 1e9690a061ec..bcc8c385faf2 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -4731,79 +4731,37 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
>>> 
>>> static __be32
>>> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>>> -                         struct nfsd4_read *read,
>>> -                         unsigned long *maxcount, u32 *eof,
>>> -                         loff_t *pos)
>>> +                         struct nfsd4_read *read)
>>> {
>>> -     struct xdr_stream *xdr = resp->xdr;
>>> +     bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
>>>      struct file *file = read->rd_nf->nf_file;
>>> -     int starting_len = xdr->buf->len;
>>> -     loff_t hole_pos;
>>> -     __be32 nfserr;
>>> -     __be32 *p, tmp;
>>> -     __be64 tmp64;
>>> -
>>> -     hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
>>> -     if (hole_pos > read->rd_offset)
>>> -             *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
>>> -     *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
>>> +     struct xdr_stream *xdr = resp->xdr;
>>> +     unsigned long maxcount;
>>> +     __be32 nfserr, *p;
>>> 
>>>      /* Content type, offset, byte count */
>>>      p = xdr_reserve_space(xdr, 4 + 8 + 4);
>>>      if (!p)
>>> -             return nfserr_resource;
>>> +             return nfserr_io;
>> 
>> Wouldn't nfserr_rep_too_big be a more appropriate status for running
>> off the end of the send buffer? I'm not 100% sure, but I would expect
>> that exhausting send buffer space would imply the reply has grown too
>> large.
> 
> I can switch it to that, no problem.

OK, never mind. I see that nfsd4_encode_compound() handles the status
code conversion for every encoder, and deals with reply caching too:

5349         if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
5350                 struct nfsd4_slot *slot = resp->cstate.slot;
5351 
5352                 if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
5353                         op->status = nfserr_rep_too_big_to_cache;
5354                 else
5355                         op->status = nfserr_rep_too_big;
5356         }

So returning nfserr_resource from the READ_PLUS encoder when
xdr_reserve_space() fails is copacetic and preferred.

Then, once READ_PLUS can return multiple segments again, it should
deal with send buffer space exhaustion by truncating the reply at
the last properly encoded segment, as Trond suggested.


>>> +     if (resp->xdr->buf->page_len && splice_ok) {
>>> +             WARN_ON_ONCE(splice_ok);
>>> +             return nfserr_io;
>>> +     }
>> 
>> I wish I understood why this test was needed. It seems to have been
>> copied and pasted from historic code into nfsd4_encode_read(), and
>> there have been recent mechanical changes to it, but there's no
>> comment explaining it there...
> 
> Yeah, I saw this was in the read code and assumed it was an important
> check so I added it here too.
>> 
>> In any event, this seems to be checking for a server software bug,
>> so maybe this should return nfserr_serverfault. Oddly that status
>> code isn't defined yet.
> 
> Do you want me to add that code and return it in this patch?

I would still like to return serverfault if the splice check
fails.


>> Do you have some performance results for v2?
> 
> Not yet, I have it running now so hopefully I'll have something ready
> by tomorrow morning.
> 
> Anna
>> 
>> 
>>> -     read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
>>> -     if (read->rd_vlen < 0)
>>> -             return nfserr_resource;
>>> +     maxcount = min_t(unsigned long, read->rd_length,
>>> +                      (xdr->buf->buflen - xdr->buf->len));
>>> 
>>> -     nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
>>> -                         resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
>>> +     if (file->f_op->splice_read && splice_ok)
>>> +             nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>>> +     else
>>> +             nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>>>      if (nfserr)
>>>              return nfserr;
>>> -     xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
>>> 
>>> -     tmp = htonl(NFS4_CONTENT_DATA);
>>> -     write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
>>> -     tmp64 = cpu_to_be64(read->rd_offset);
>>> -     write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
>>> -     tmp = htonl(*maxcount);
>>> -     write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
>>> -
>>> -     tmp = xdr_zero;
>>> -     write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
>>> -                            xdr_pad_size(*maxcount));
>>> -     return nfs_ok;
>>> -}
>>> -
>>> -static __be32
>>> -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
>>> -                         struct nfsd4_read *read,
>>> -                         unsigned long *maxcount, u32 *eof)
>>> -{
>>> -     struct file *file = read->rd_nf->nf_file;
>>> -     loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
>>> -     loff_t f_size = i_size_read(file_inode(file));
>>> -     unsigned long count;
>>> -     __be32 *p;
>>> -
>>> -     if (data_pos == -ENXIO)
>>> -             data_pos = f_size;
>>> -     else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
>>> -             return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
>>> -     count = data_pos - read->rd_offset;
>>> -
>>> -     /* Content type, offset, byte count */
>>> -     p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
>>> -     if (!p)
>>> -             return nfserr_resource;
>>> -
>>> -     *p++ = htonl(NFS4_CONTENT_HOLE);
>>> +     *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
>>>      p = xdr_encode_hyper(p, read->rd_offset);
>>> -     p = xdr_encode_hyper(p, count);
>>> +     *p = cpu_to_be32(read->rd_length);
>>> 
>>> -     *eof = (read->rd_offset + count) >= f_size;
>>> -     *maxcount = min_t(unsigned long, count, *maxcount);
>>>      return nfs_ok;
>>> }
>>> 
>>> @@ -4811,69 +4769,36 @@ static __be32
>>> nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>                     struct nfsd4_read *read)
>>> {
>>> -     unsigned long maxcount, count;
>>> +     struct file *file = read->rd_nf->nf_file;
>>>      struct xdr_stream *xdr = resp->xdr;
>>> -     struct file *file;
>>>      int starting_len = xdr->buf->len;
>>> -     int last_segment = xdr->buf->len;
>>> -     int segments = 0;
>>> -     __be32 *p, tmp;
>>> -     bool is_data;
>>> -     loff_t pos;
>>> -     u32 eof;
>>> +     u32 segments = 0;
>>> +     __be32 *p;
>>> 
>>>      if (nfserr)
>>>              return nfserr;
>>> -     file = read->rd_nf->nf_file;
>>> 
>>>      /* eof flag, segment count */
>>>      p = xdr_reserve_space(xdr, 4 + 4);
>>>      if (!p)
>>> -             return nfserr_resource;
>>> +             return nfserr_io;
>>>      xdr_commit_encode(xdr);
>>> 
>>> -     maxcount = min_t(unsigned long, read->rd_length,
>>> -                      (xdr->buf->buflen - xdr->buf->len));
>>> -     count    = maxcount;
>>> -
>>> -     eof = read->rd_offset >= i_size_read(file_inode(file));
>>> -     if (eof)
>>> +     read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
>>> +     if (read->rd_eof)
>>>              goto out;
>>> 
>>> -     pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
>>> -     is_data = pos > read->rd_offset;
>>> -
>>> -     while (count > 0 && !eof) {
>>> -             maxcount = count;
>>> -             if (is_data)
>>> -                     nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
>>> -                                             segments == 0 ? &pos : NULL);
>>> -             else
>>> -                     nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
>>> -             if (nfserr)
>>> -                     goto out;
>>> -             count -= maxcount;
>>> -             read->rd_offset += maxcount;
>>> -             is_data = !is_data;
>>> -             last_segment = xdr->buf->len;
>>> -             segments++;
>>> -     }
>>> -
>>> -out:
>>> -     if (nfserr && segments == 0)
>>> +     nfserr = nfsd4_encode_read_plus_data(resp, read);
>>> +     if (nfserr) {
>>>              xdr_truncate_encode(xdr, starting_len);
>>> -     else {
>>> -             if (nfserr) {
>>> -                     xdr_truncate_encode(xdr, last_segment);
>>> -                     nfserr = nfs_ok;
>>> -                     eof = 0;
>>> -             }
>>> -             tmp = htonl(eof);
>>> -             write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
>>> -             tmp = htonl(segments);
>>> -             write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
>>> +             return nfserr;
>>>      }
>>> 
>>> +     segments++;
>>> +
>>> +out:
>>> +     p = xdr_encode_bool(p, read->rd_eof);
>>> +     *p = cpu_to_be32(segments);
>>>      return nfserr;
>>> }
>>> 
>>> --
>>> 2.37.2
>>> 
>> 
>> --
>> Chuck Lever

--
Chuck Lever







[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux