On Fri, Jul 27, 2018 at 11:19:10AM -0400, Chuck Lever wrote: > I've given up on the idea of zero-copy handling of SYMLINK on the > server side. This is because the Linux VFS symlink API requires the > symlink pathname to be in a NUL-terminated kmalloc'd buffer. The > NUL-termination is going to be problematic (watching out for > landing on a page boundary and dealing with a 4096-byte pathname). > > I don't believe that SYMLINK creation is on a performance path or is > requested frequently enough that it will cause noticeable CPU cache > pollution due to data copies. Sounds fine. The limits here are weird. In the v2 case the maximum length is NFS_MAXPATHLEN == 1024. OK, I see, I guess that limit was imposed by the NFSv2 protocol. In the v3 case it's NFS3_MAXPATHLEN == PATH_MAX == 4096. But we were imposing an artificial max of 4095 just so we could fit the result in one page with null termination. OK, makes sense to me (though I can't recall anyone actually complaining about that limit). --b. > > There will be two places where a transport callout will be necessary > to fill in the rqstp: one will be in the svc_fill_symlink_pathname() > helper that is used by NFSv2 and NFSv3, and the other will be in > nfsd4_decode_create(). > > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > --- > fs/nfsd/nfs3proc.c | 2 + > fs/nfsd/nfsproc.c | 2 + > include/linux/sunrpc/svc.h | 3 +- > net/sunrpc/svc.c | 67 ++++++++++++++++---------------------------- > 4 files changed, 31 insertions(+), 43 deletions(-) > > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c > index 8d1c2d1a..9eb8086 100644 > --- a/fs/nfsd/nfs3proc.c > +++ b/fs/nfsd/nfs3proc.c > @@ -290,6 +290,7 @@ > RETURN_STATUS(nfserr_nametoolong); > > argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, > + page_address(rqstp->rq_arg.pages[0]), > argp->tlen); > if (IS_ERR(argp->tname)) > RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); > @@ -303,6 +304,7 @@ > fh_init(&resp->fh, NFS3_FHSIZE); > nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, > argp->tname, &resp->fh); > + kfree(argp->tname); > RETURN_STATUS(nfserr); > } > > diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c > index a6faee5..0d20fd1 100644 > --- a/fs/nfsd/nfsproc.c > +++ b/fs/nfsd/nfsproc.c > @@ -454,6 +454,7 @@ > return nfserr_nametoolong; > > argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, > + page_address(rqstp->rq_arg.pages[0]), > argp->tlen); > if (IS_ERR(argp->tname)) > return nfserrno(PTR_ERR(argp->tname)); > @@ -466,6 +467,7 @@ > nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, > argp->tname, &newfh); > > + kfree(argp->tname); > fh_put(&argp->ffh); > fh_put(&newfh); > return nfserr; > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index 43f88bd..73e130a 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -499,7 +499,8 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, > struct page **pages, > struct kvec *first, size_t total); > char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, > - struct kvec *first, size_t total); > + struct kvec *first, void *p, > + size_t total); > > #define RPC_MAX_ADDRBUFLEN (63U) > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > index 2194ed5..d13e05f 100644 > --- a/net/sunrpc/svc.c > +++ b/net/sunrpc/svc.c > @@ -1577,65 +1577,48 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, > * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call > * @rqstp: svc_rqst to operate on > * @first: buffer containing first section of pathname > + * @p: buffer containing remaining section of pathname > * @total: total length of the pathname argument > * > - * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is > - * released automatically when @rqstp is recycled. > + * The VFS symlink API demands a NUL-terminated pathname in mapped memory. > + * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free > + * the returned string. > */ > char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, > - size_t total) > + void *p, size_t total) > { > - struct xdr_buf *arg = &rqstp->rq_arg; > - struct page **pages; > - char *result; > - > - /* VFS API demands a NUL-terminated pathname. This function > - * uses a page from @rqstp as the pathname buffer, to enable > - * direct placement. Thus the total buffer size is PAGE_SIZE. > - * Space in this buffer for NUL-termination requires that we > - * cap the size of the returned symlink pathname just a > - * little early. > - */ > - if (total > PAGE_SIZE - 1) > - return ERR_PTR(-ENAMETOOLONG); > + size_t len, remaining; > + char *result, *dst; > > - /* Some types of transport can present the pathname entirely > - * in rq_arg.pages. If not, then copy the pathname into one > - * page. > - */ > - pages = arg->pages; > - WARN_ON_ONCE(arg->page_base != 0); > - if (first->iov_base == 0) { > - result = page_address(*pages); > - result[total] = '\0'; > - } else { > - size_t len, remaining; > - char *dst; > + result = kmalloc(total + 1, GFP_KERNEL); > + if (!result) > + return ERR_PTR(-ESERVERFAULT); > > - result = page_address(*(rqstp->rq_next_page++)); > - dst = result; > - remaining = total; > + dst = result; > + remaining = total; > > - len = min_t(size_t, total, first->iov_len); > + len = min_t(size_t, total, first->iov_len); > + if (len) { > memcpy(dst, first->iov_base, len); > dst += len; > remaining -= len; > + } > > - /* No more than one page left */ > - if (remaining) { > - len = min_t(size_t, remaining, PAGE_SIZE); > - memcpy(dst, page_address(*pages), len); > - dst += len; > - } > - > - *dst = '\0'; > + if (remaining) { > + len = min_t(size_t, remaining, PAGE_SIZE); > + memcpy(dst, p, len); > + dst += len; > } > > - /* Sanity check: we don't allow the pathname argument to > + *dst = '\0'; > + > + /* Sanity check: Linux doesn't allow the pathname argument to > * contain a NUL byte. > */ > - if (strlen(result) != total) > + if (strlen(result) != total) { > + kfree(result); > return ERR_PTR(-EINVAL); > + } > return result; > } > EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html