> On Dec 21, 2017, at 12:43 PM, Chuck Lever <chuck.lever@xxxxxxxxxx> wrote: > > Move common code in NFSD's symlink arg decoders into a helper. The > immediate benefits include: > > - one fewer data copies on transports that support DDP > - no memory allocation in the NFSv4 XDR decoder > - consistent error checking across all versions > - reduction of code duplication > - support for both legal forms of SYMLINK requests on RDMA > transports for all versions of NFS (in particular, NFSv2, for > completeness) > > In the long term, this helper is an appropriate spot to perform a > per-transport call-out to fill the pathname argument using, say, > RDMA Reads. > > Filling the pathname in the proc function also means that eventually > the incoming filehandle can be interpreted so that filesystem- > specific memory can be allocated as a sink for the pathname > argument, rather than using anonymous pages. > > Wondering why the current code punts a zero-length SYMLINK. Is it > illegal to create a zero-length SYMLINK on Linux? > > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > --- > fs/nfsd/nfs3proc.c | 10 +++++++ > fs/nfsd/nfs3xdr.c | 51 ++++++++------------------------- > fs/nfsd/nfs4proc.c | 7 +++++ > fs/nfsd/nfs4xdr.c | 10 +++++-- > fs/nfsd/nfsproc.c | 14 +++++---- > fs/nfsd/nfsxdr.c | 49 +++++++++++++++++++------------- > fs/nfsd/xdr.h | 1 + > fs/nfsd/xdr3.h | 1 + > fs/nfsd/xdr4.h | 2 + > include/linux/sunrpc/svc.h | 2 + > net/sunrpc/svc.c | 67 ++++++++++++++++++++++++++++++++++++++++++++ > 11 files changed, 146 insertions(+), 68 deletions(-) > > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c > index 2dd95eb..38b0cf1 100644 > --- a/fs/nfsd/nfs3proc.c > +++ b/fs/nfsd/nfs3proc.c > @@ -283,6 +283,16 @@ > struct nfsd3_diropres *resp = rqstp->rq_resp; > __be32 nfserr; > > + if (argp->tlen == 0) > + RETURN_STATUS(nfserr_inval); > + if (argp->tlen > NFS3_MAXPATHLEN) > + RETURN_STATUS(nfserr_nametoolong); > + > + argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, > + argp->tlen); > + if (IS_ERR(argp->tname)) > + RETURN_STATUS(PTR_ERR(argp->tname)); The 0day robot has rightly pointed out a problem here. svc_fill_symlink_pathname() can return a negative errno hidden in a pointer, but the RETURN_STATUS macro expects a be32 integer NFS status. Not at all the same. I'll post a corrected version of this patch soon. > + > dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", > SVCFH_fmt(&argp->ffh), > argp->flen, argp->fname, > diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c > index 240cdb0e..78b555b 100644 > --- a/fs/nfsd/nfs3xdr.c > +++ b/fs/nfsd/nfs3xdr.c > @@ -452,51 +452,24 @@ void fill_post_wcc(struct svc_fh *fhp) > nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) > { > struct nfsd3_symlinkargs *args = rqstp->rq_argp; > - unsigned int len, avail; > - char *old, *new; > - struct kvec *vec; > + char *base = (char *)p; > + size_t dlen; > > if (!(p = decode_fh(p, &args->ffh)) || > - !(p = decode_filename(p, &args->fname, &args->flen)) > - ) > + !(p = decode_filename(p, &args->fname, &args->flen))) > return 0; > p = decode_sattr3(p, &args->attrs); > > - /* now decode the pathname, which might be larger than the first page. > - * As we have to check for nul's anyway, we copy it into a new page > - * This page appears in the rq_res.pages list, but as pages_len is always > - * 0, it won't get in the way > - */ > - len = ntohl(*p++); > - if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) > - return 0; > - args->tname = new = page_address(*(rqstp->rq_next_page++)); > - args->tlen = len; > - /* first copy and check from the first page */ > - old = (char*)p; > - vec = &rqstp->rq_arg.head[0]; > - if ((void *)old > vec->iov_base + vec->iov_len) > - return 0; > - avail = vec->iov_len - (old - (char*)vec->iov_base); > - while (len && avail && *old) { > - *new++ = *old++; > - len--; > - avail--; > - } > - /* now copy next page if there is one */ > - if (len && !avail && rqstp->rq_arg.page_len) { > - avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE); > - old = page_address(rqstp->rq_arg.pages[0]); > - } > - while (len && avail && *old) { > - *new++ = *old++; > - len--; > - avail--; > - } > - *new = '\0'; > - if (len) > - return 0; > + args->tlen = ntohl(*p++); > > + args->first.iov_base = p; > + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; > + args->first.iov_len -= (char *)p - base; > + > + dlen = args->first.iov_len + rqstp->rq_arg.page_len + > + rqstp->rq_arg.tail[0].iov_len; > + if (dlen < XDR_QUADLEN(args->tlen) << 2) > + return 0; > return 1; > } > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 5029b96..36bd1f7 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -605,6 +605,13 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) > > switch (create->cr_type) { > case NF4LNK: > + if (create->cr_datalen > NFS4_MAXPATHLEN) > + return nfserr_nametoolong; > + create->cr_data = > + svc_fill_symlink_pathname(rqstp, &create->cr_first, > + create->cr_datalen); > + if (IS_ERR(create->cr_data)) > + return nfserrno(PTR_ERR(create->cr_data)); > status = nfsd_symlink(rqstp, &cstate->current_fh, > create->cr_name, create->cr_namelen, > create->cr_data, &resfh); > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index bd25230..d05384e 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -648,6 +648,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, > static __be32 > nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) > { > + struct kvec *head; > DECODE_HEAD; > > READ_BUF(4); > @@ -656,10 +657,13 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, > case NF4LNK: > READ_BUF(4); > create->cr_datalen = be32_to_cpup(p++); > + if (create->cr_datalen == 0) > + return nfserr_inval; > + head = argp->rqstp->rq_arg.head; > + create->cr_first.iov_base = p; > + create->cr_first.iov_len = head->iov_len; > + create->cr_first.iov_len -= (char *)p - (char *)head->iov_base; > READ_BUF(create->cr_datalen); > - create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); > - if (!create->cr_data) > - return nfserr_jukebox; > break; > case NF4BLK: > case NF4CHR: > diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c > index 1995ea6..f107f9f 100644 > --- a/fs/nfsd/nfsproc.c > +++ b/fs/nfsd/nfsproc.c > @@ -449,17 +449,19 @@ > struct svc_fh newfh; > __be32 nfserr; > > + if (argp->tlen > NFS_MAXPATHLEN) > + return nfserr_nametoolong; > + > + argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, > + argp->tlen); > + if (IS_ERR(argp->tname)) > + return nfserrno(PTR_ERR(argp->tname)); > + > dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", > SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, > argp->tlen, argp->tname); > > fh_init(&newfh, NFS_FHSIZE); > - /* > - * Crazy hack: the request fits in a page, and already-decoded > - * attributes follow argp->tname, so it's safe to just write a > - * null to ensure it's null-terminated: > - */ > - argp->tname[argp->tlen] = '\0'; > nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, > argp->tname, &newfh); > > diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c > index 165e25e..8fcd047 100644 > --- a/fs/nfsd/nfsxdr.c > +++ b/fs/nfsd/nfsxdr.c > @@ -71,22 +71,6 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp) > } > > static __be32 * > -decode_pathname(__be32 *p, char **namp, unsigned int *lenp) > -{ > - char *name; > - unsigned int i; > - > - if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { > - for (i = 0, name = *namp; i < *lenp; i++, name++) { > - if (*name == '\0') > - return NULL; > - } > - } > - > - return p; > -} > - > -static __be32 * > decode_sattr(__be32 *p, struct iattr *iap) > { > u32 tmp, tmp1; > @@ -383,14 +367,39 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f > nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) > { > struct nfsd_symlinkargs *args = rqstp->rq_argp; > + char *base = (char *)p; > + size_t xdrlen; > > if ( !(p = decode_fh(p, &args->ffh)) > - || !(p = decode_filename(p, &args->fname, &args->flen)) > - || !(p = decode_pathname(p, &args->tname, &args->tlen))) > + || !(p = decode_filename(p, &args->fname, &args->flen))) > return 0; > - p = decode_sattr(p, &args->attrs); > > - return xdr_argsize_check(rqstp, p); > + args->tlen = ntohl(*p++); > + if (args->tlen == 0) > + return 0; > + > + args->first.iov_base = p; > + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; > + args->first.iov_len -= (char *)p - base; > + > + /* This request is never larger than a page. Therefore, > + * transport will deliver either: > + * 1. pathname in the pagelist -> sattr is in the tail. > + * 2. everything in the head buffer -> sattr is in the head. > + */ > + if (rqstp->rq_arg.page_len) { > + if (args->tlen != rqstp->rq_arg.page_len) > + return 0; > + p = rqstp->rq_arg.tail[0].iov_base; > + } else { > + xdrlen = XDR_QUADLEN(args->tlen); > + if (xdrlen > args->first.iov_len - (8 * sizeof(__be32))) > + return 0; > + p += xdrlen; > + } > + decode_sattr(p, &args->attrs); > + > + return 1; > } > > int > diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h > index a765c41..ea7cca3 100644 > --- a/fs/nfsd/xdr.h > +++ b/fs/nfsd/xdr.h > @@ -72,6 +72,7 @@ struct nfsd_symlinkargs { > char * tname; > unsigned int tlen; > struct iattr attrs; > + struct kvec first; > }; > > struct nfsd_readdirargs { > diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h > index deccf7f..2cb29e9 100644 > --- a/fs/nfsd/xdr3.h > +++ b/fs/nfsd/xdr3.h > @@ -90,6 +90,7 @@ struct nfsd3_symlinkargs { > char * tname; > unsigned int tlen; > struct iattr attrs; > + struct kvec first; > }; > > struct nfsd3_readdirargs { > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index d56219d..b485cd1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -110,6 +110,7 @@ struct nfsd4_create { > struct { > u32 datalen; > char *data; > + struct kvec first; > } link; /* NF4LNK */ > struct { > u32 specdata1; > @@ -124,6 +125,7 @@ struct nfsd4_create { > }; > #define cr_datalen u.link.datalen > #define cr_data u.link.data > +#define cr_first u.link.first > #define cr_specdata1 u.dev.specdata1 > #define cr_specdata2 u.dev.specdata2 > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index 238b9ae..fd5846e 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -495,6 +495,8 @@ int svc_register(const struct svc_serv *, struct net *, const int, > char * svc_print_addr(struct svc_rqst *, char *, size_t); > unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, > struct kvec *first, size_t total); > +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, > + struct kvec *first, size_t total); > > #define RPC_MAX_ADDRBUFLEN (63U) > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > index 759b668..fc93406 100644 > --- a/net/sunrpc/svc.c > +++ b/net/sunrpc/svc.c > @@ -1578,3 +1578,70 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, > return i; > } > EXPORT_SYMBOL_GPL(svc_fill_write_vector); > + > +/** > + * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call > + * @rqstp: svc_rqst to operate on > + * @first: buffer containing first section of pathname > + * @total: total length of the pathname argument > + * > + * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is > + * released automatically when @rqstp is recycled. > + */ > +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, > + size_t total) > +{ > + struct xdr_buf *arg = &rqstp->rq_arg; > + struct page **pages; > + char *result; > + > + /* VFS API demands a NUL-terminated pathname. This function > + * uses a page from @rqstp as the pathname buffer, to enable > + * direct placement. Thus the total buffer size is PAGE_SIZE. > + * Space in this buffer for NUL-termination requires that we > + * cap the size of the returned symlink pathname just a > + * little early. > + */ > + if (total > PAGE_SIZE - 1) > + return ERR_PTR(-ENAMETOOLONG); > + > + /* Some types of transport can present the pathname entirely > + * in rq_arg.pages. If not, then copy the pathname into one > + * page. > + */ > + pages = arg->pages; > + WARN_ON_ONCE(arg->page_base != 0); > + if (first->iov_base == 0) { > + result = page_address(*pages); > + result[total] = '\0'; > + } else { > + size_t len, remaining; > + char *dst; > + > + result = page_address(*(rqstp->rq_next_page++)); > + dst = result; > + remaining = total; > + > + len = min_t(size_t, total, first->iov_len); > + memcpy(dst, first->iov_base, len); > + dst += len; > + remaining -= len; > + > + /* No more than one page left */ > + if (remaining) { > + len = min_t(size_t, remaining, PAGE_SIZE); > + memcpy(dst, page_address(*pages), len); > + dst += len; > + } > + > + *dst = '\0'; > + } > + > + /* Sanity check: we don't allow the pathname argument to > + * contain a NUL byte. > + */ > + if (strlen(result) != total) > + return ERR_PTR(-EINVAL); > + return result; > +} > +EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Chuck Lever -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html