> On Sep 7, 2022, at 4:37 PM, Anna Schumaker <anna@xxxxxxxxxx> wrote: > > On Wed, Sep 7, 2022 at 4:29 PM Chuck Lever III <chuck.lever@xxxxxxxxxx> wrote: >> >> Be sure to Cc: Jeff on these. Thanks! >> >> >>> On Sep 7, 2022, at 3:52 PM, Anna Schumaker <anna@xxxxxxxxxx> wrote: >>> >>> From: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx> >>> >>> Chuck had suggested reverting READ_PLUS so it returns a single DATA >>> segment covering the requested read range. This prepares the server for >>> a future "sparse read" function so support can easily be added without >>> needing to rip out the old READ_PLUS code at the same time. >>> >>> Signed-off-by: Anna Schumaker <Anna.Schumaker@xxxxxxxxxx> >>> --- >>> fs/nfsd/nfs4xdr.c | 139 +++++++++++----------------------------------- >>> 1 file changed, 32 insertions(+), 107 deletions(-) >>> >>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>> index 1e9690a061ec..bcc8c385faf2 100644 >>> --- a/fs/nfsd/nfs4xdr.c >>> +++ b/fs/nfsd/nfs4xdr.c >>> @@ -4731,79 +4731,37 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, >>> >>> static __be32 >>> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, >>> - struct nfsd4_read *read, >>> - unsigned long *maxcount, u32 *eof, >>> - loff_t *pos) >>> + struct nfsd4_read *read) >>> { >>> - struct xdr_stream *xdr = resp->xdr; >>> + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); >>> struct file *file = read->rd_nf->nf_file; >>> - int starting_len = xdr->buf->len; >>> - loff_t hole_pos; >>> - __be32 nfserr; >>> - __be32 *p, tmp; >>> - __be64 tmp64; >>> - >>> - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); >>> - if (hole_pos > read->rd_offset) >>> - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); >>> - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); >>> + struct xdr_stream *xdr = resp->xdr; >>> + unsigned long maxcount; >>> + __be32 nfserr, *p; >>> >>> /* Content type, offset, byte count */ >>> p = xdr_reserve_space(xdr, 4 + 8 + 4); >>> if (!p) >>> - return nfserr_resource; >>> + return nfserr_io; >> >> Wouldn't nfserr_rep_too_big be a more appropriate status for running >> off the end of the send buffer? I'm not 100% sure, but I would expect >> that exhausting send buffer space would imply the reply has grown too >> large. > > I can switch it to that, no problem. OK, never mind. I see that nfsd4_encode_compound() handles the status code conversion for every encoder, and deals with reply caching too: 5349 if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { 5350 struct nfsd4_slot *slot = resp->cstate.slot; 5351 5352 if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) 5353 op->status = nfserr_rep_too_big_to_cache; 5354 else 5355 op->status = nfserr_rep_too_big; 5356 } So returning nfserr_resource from the READ_PLUS encoder when xdr_reserve_space() fails is copacetic and preferred. Then, once READ_PLUS can return multiple segments again, it should deal with send buffer space exhaustion by truncating the reply at the last properly encoded segment, as Trond suggested. >>> + if (resp->xdr->buf->page_len && splice_ok) { >>> + WARN_ON_ONCE(splice_ok); >>> + return nfserr_io; >>> + } >> >> I wish I understood why this test was needed. It seems to have been >> copied and pasted from historic code into nfsd4_encode_read(), and >> there have been recent mechanical changes to it, but there's no >> comment explaining it there... > > Yeah, I saw this was in the read code and assumed it was an important > check so I added it here too. >> >> In any event, this seems to be checking for a server software bug, >> so maybe this should return nfserr_serverfault. Oddly that status >> code isn't defined yet. > > Do you want me to add that code and return it in this patch? I would still like to return serverfault if the splice check fails. >> Do you have some performance results for v2? > > Not yet, I have it running now so hopefully I'll have something ready > by tomorrow morning. > > Anna >> >> >>> - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); >>> - if (read->rd_vlen < 0) >>> - return nfserr_resource; >>> + maxcount = min_t(unsigned long, read->rd_length, >>> + (xdr->buf->buflen - xdr->buf->len)); >>> >>> - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, >>> - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); >>> + if (file->f_op->splice_read && splice_ok) >>> + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); >>> + else >>> + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); >>> if (nfserr) >>> return nfserr; >>> - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); >>> >>> - tmp = htonl(NFS4_CONTENT_DATA); >>> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); >>> - tmp64 = cpu_to_be64(read->rd_offset); >>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); >>> - tmp = htonl(*maxcount); >>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); >>> - >>> - tmp = xdr_zero; >>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, >>> - xdr_pad_size(*maxcount)); >>> - return nfs_ok; >>> -} >>> - >>> -static __be32 >>> -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, >>> - struct nfsd4_read *read, >>> - unsigned long *maxcount, u32 *eof) >>> -{ >>> - struct file *file = read->rd_nf->nf_file; >>> - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); >>> - loff_t f_size = i_size_read(file_inode(file)); >>> - unsigned long count; >>> - __be32 *p; >>> - >>> - if (data_pos == -ENXIO) >>> - data_pos = f_size; >>> - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) >>> - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); >>> - count = data_pos - read->rd_offset; >>> - >>> - /* Content type, offset, byte count */ >>> - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); >>> - if (!p) >>> - return nfserr_resource; >>> - >>> - *p++ = htonl(NFS4_CONTENT_HOLE); >>> + *p++ = cpu_to_be32(NFS4_CONTENT_DATA); >>> p = xdr_encode_hyper(p, read->rd_offset); >>> - p = xdr_encode_hyper(p, count); >>> + *p = cpu_to_be32(read->rd_length); >>> >>> - *eof = (read->rd_offset + count) >= f_size; >>> - *maxcount = min_t(unsigned long, count, *maxcount); >>> return nfs_ok; >>> } >>> >>> @@ -4811,69 +4769,36 @@ static __be32 >>> nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, >>> struct nfsd4_read *read) >>> { >>> - unsigned long maxcount, count; >>> + struct file *file = read->rd_nf->nf_file; >>> struct xdr_stream *xdr = resp->xdr; >>> - struct file *file; >>> int starting_len = xdr->buf->len; >>> - int last_segment = xdr->buf->len; >>> - int segments = 0; >>> - __be32 *p, tmp; >>> - bool is_data; >>> - loff_t pos; >>> - u32 eof; >>> + u32 segments = 0; >>> + __be32 *p; >>> >>> if (nfserr) >>> return nfserr; >>> - file = read->rd_nf->nf_file; >>> >>> /* eof flag, segment count */ >>> p = xdr_reserve_space(xdr, 4 + 4); >>> if (!p) >>> - return nfserr_resource; >>> + return nfserr_io; >>> xdr_commit_encode(xdr); >>> >>> - maxcount = min_t(unsigned long, read->rd_length, >>> - (xdr->buf->buflen - xdr->buf->len)); >>> - count = maxcount; >>> - >>> - eof = read->rd_offset >= i_size_read(file_inode(file)); >>> - if (eof) >>> + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); >>> + if (read->rd_eof) >>> goto out; >>> >>> - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); >>> - is_data = pos > read->rd_offset; >>> - >>> - while (count > 0 && !eof) { >>> - maxcount = count; >>> - if (is_data) >>> - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, >>> - segments == 0 ? &pos : NULL); >>> - else >>> - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); >>> - if (nfserr) >>> - goto out; >>> - count -= maxcount; >>> - read->rd_offset += maxcount; >>> - is_data = !is_data; >>> - last_segment = xdr->buf->len; >>> - segments++; >>> - } >>> - >>> -out: >>> - if (nfserr && segments == 0) >>> + nfserr = nfsd4_encode_read_plus_data(resp, read); >>> + if (nfserr) { >>> xdr_truncate_encode(xdr, starting_len); >>> - else { >>> - if (nfserr) { >>> - xdr_truncate_encode(xdr, last_segment); >>> - nfserr = nfs_ok; >>> - eof = 0; >>> - } >>> - tmp = htonl(eof); >>> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); >>> - tmp = htonl(segments); >>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); >>> + return nfserr; >>> } >>> >>> + segments++; >>> + >>> +out: >>> + p = xdr_encode_bool(p, read->rd_eof); >>> + *p = cpu_to_be32(segments); >>> return nfserr; >>> } >>> >>> -- >>> 2.37.2 >>> >> >> -- >> Chuck Lever -- Chuck Lever