> On Aug 1, 2018, at 10:14 AM, J. Bruce Fields <bfields@xxxxxxxxxxxx> wrote: > > On Fri, Jul 27, 2018 at 11:19:10AM -0400, Chuck Lever wrote: >> I've given up on the idea of zero-copy handling of SYMLINK on the >> server side. This is because the Linux VFS symlink API requires the >> symlink pathname to be in a NUL-terminated kmalloc'd buffer. The >> NUL-termination is going to be problematic (watching out for >> landing on a page boundary and dealing with a 4096-byte pathname). >> >> I don't believe that SYMLINK creation is on a performance path or is >> requested frequently enough that it will cause noticeable CPU cache >> pollution due to data copies. > > Sounds fine. > > The limits here are weird. In the v2 case the maximum length is > NFS_MAXPATHLEN == 1024. OK, I see, I guess that limit was imposed by > the NFSv2 protocol. Right. > In the v3 case it's NFS3_MAXPATHLEN == PATH_MAX == > 4096. But we were imposing an artificial max of 4095 just so we could > fit the result in one page with null termination. > > OK, makes sense to me (though I can't recall anyone actually complaining > about that limit). I haven't heard complaints either. I'm just trying to make this code a little more sensible. :-) > --b. > > >> >> There will be two places where a transport callout will be necessary >> to fill in the rqstp: one will be in the svc_fill_symlink_pathname() >> helper that is used by NFSv2 and NFSv3, and the other will be in >> nfsd4_decode_create(). >> >> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> >> --- >> fs/nfsd/nfs3proc.c | 2 + >> fs/nfsd/nfsproc.c | 2 + >> include/linux/sunrpc/svc.h | 3 +- >> net/sunrpc/svc.c | 67 ++++++++++++++++---------------------------- >> 4 files changed, 31 insertions(+), 43 deletions(-) >> >> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c >> index 8d1c2d1a..9eb8086 100644 >> --- a/fs/nfsd/nfs3proc.c >> +++ b/fs/nfsd/nfs3proc.c >> @@ -290,6 +290,7 @@ >> RETURN_STATUS(nfserr_nametoolong); >> >> argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, >> + page_address(rqstp->rq_arg.pages[0]), >> argp->tlen); >> if (IS_ERR(argp->tname)) >> RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); >> @@ -303,6 +304,7 @@ >> fh_init(&resp->fh, NFS3_FHSIZE); >> nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, >> argp->tname, &resp->fh); >> + kfree(argp->tname); >> RETURN_STATUS(nfserr); >> } >> >> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c >> index a6faee5..0d20fd1 100644 >> --- a/fs/nfsd/nfsproc.c >> +++ b/fs/nfsd/nfsproc.c >> @@ -454,6 +454,7 @@ >> return nfserr_nametoolong; >> >> argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, >> + page_address(rqstp->rq_arg.pages[0]), >> argp->tlen); >> if (IS_ERR(argp->tname)) >> return nfserrno(PTR_ERR(argp->tname)); >> @@ -466,6 +467,7 @@ >> nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, >> argp->tname, &newfh); >> >> + kfree(argp->tname); >> fh_put(&argp->ffh); >> fh_put(&newfh); >> return nfserr; >> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h >> index 43f88bd..73e130a 100644 >> --- a/include/linux/sunrpc/svc.h >> +++ b/include/linux/sunrpc/svc.h >> @@ -499,7 +499,8 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, >> struct page **pages, >> struct kvec *first, size_t total); >> char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, >> - struct kvec *first, size_t total); >> + struct kvec *first, void *p, >> + size_t total); >> >> #define RPC_MAX_ADDRBUFLEN (63U) >> >> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c >> index 2194ed5..d13e05f 100644 >> --- a/net/sunrpc/svc.c >> +++ b/net/sunrpc/svc.c >> @@ -1577,65 +1577,48 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, >> * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call >> * @rqstp: svc_rqst to operate on >> * @first: buffer containing first section of pathname >> + * @p: buffer containing remaining section of pathname >> * @total: total length of the pathname argument >> * >> - * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is >> - * released automatically when @rqstp is recycled. >> + * The VFS symlink API demands a NUL-terminated pathname in mapped memory. >> + * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free >> + * the returned string. >> */ >> char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, >> - size_t total) >> + void *p, size_t total) >> { >> - struct xdr_buf *arg = &rqstp->rq_arg; >> - struct page **pages; >> - char *result; >> - >> - /* VFS API demands a NUL-terminated pathname. This function >> - * uses a page from @rqstp as the pathname buffer, to enable >> - * direct placement. Thus the total buffer size is PAGE_SIZE. >> - * Space in this buffer for NUL-termination requires that we >> - * cap the size of the returned symlink pathname just a >> - * little early. >> - */ >> - if (total > PAGE_SIZE - 1) >> - return ERR_PTR(-ENAMETOOLONG); >> + size_t len, remaining; >> + char *result, *dst; >> >> - /* Some types of transport can present the pathname entirely >> - * in rq_arg.pages. If not, then copy the pathname into one >> - * page. >> - */ >> - pages = arg->pages; >> - WARN_ON_ONCE(arg->page_base != 0); >> - if (first->iov_base == 0) { >> - result = page_address(*pages); >> - result[total] = '\0'; >> - } else { >> - size_t len, remaining; >> - char *dst; >> + result = kmalloc(total + 1, GFP_KERNEL); >> + if (!result) >> + return ERR_PTR(-ESERVERFAULT); >> >> - result = page_address(*(rqstp->rq_next_page++)); >> - dst = result; >> - remaining = total; >> + dst = result; >> + remaining = total; >> >> - len = min_t(size_t, total, first->iov_len); >> + len = min_t(size_t, total, first->iov_len); >> + if (len) { >> memcpy(dst, first->iov_base, len); >> dst += len; >> remaining -= len; >> + } >> >> - /* No more than one page left */ >> - if (remaining) { >> - len = min_t(size_t, remaining, PAGE_SIZE); >> - memcpy(dst, page_address(*pages), len); >> - dst += len; >> - } >> - >> - *dst = '\0'; >> + if (remaining) { >> + len = min_t(size_t, remaining, PAGE_SIZE); >> + memcpy(dst, p, len); >> + dst += len; >> } >> >> - /* Sanity check: we don't allow the pathname argument to >> + *dst = '\0'; >> + >> + /* Sanity check: Linux doesn't allow the pathname argument to >> * contain a NUL byte. >> */ >> - if (strlen(result) != total) >> + if (strlen(result) != total) { >> + kfree(result); >> return ERR_PTR(-EINVAL); >> + } >> return result; >> } >> EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); -- Chuck Lever -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html