David Howells <dhowells@xxxxxxxxxx> wrote: > Chuck Lever III <chuck.lever@xxxxxxxxxx> wrote: > > > Simply replacing the kernel_sendpage() loop would be a > > straightforward change and easy to evaluate and test, and > > I'd welcome that without hesitation. > > How about the attached for a first phase? > > It does three sendmsgs, one for the marker + header, one for the body and one > for the tail. ... And this as a second phase. David --- sunrpc: Allow xdr->bvec[] to be extended to do a single sendmsg Allow xdr->bvec[] to be extended and insert the marker, the header and the tail into it so that a single sendmsg() can be used to transmit the message. I wonder if it would be possible to insert the marker at the beginning of the head buffer. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> cc: Anna Schumaker <anna@xxxxxxxxxx> cc: Chuck Lever <chuck.lever@xxxxxxxxxx> cc: Jeff Layton <jlayton@xxxxxxxxxx> cc: "David S. Miller" <davem@xxxxxxxxxxxxx> cc: Eric Dumazet <edumazet@xxxxxxxxxx> cc: Jakub Kicinski <kuba@xxxxxxxxxx> cc: Paolo Abeni <pabeni@xxxxxxxxxx> cc: Jens Axboe <axboe@xxxxxxxxx> cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> cc: linux-nfs@xxxxxxxxxxxxxxx cc: netdev@xxxxxxxxxxxxxxx --- include/linux/sunrpc/xdr.h | 2 - net/sunrpc/svcsock.c | 46 ++++++++++++++------------------------------- net/sunrpc/xdr.c | 19 ++++++++++-------- net/sunrpc/xprtsock.c | 6 ++--- 4 files changed, 30 insertions(+), 43 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 72014c9216fc..c74ea483228b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -137,7 +137,7 @@ void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); void xdr_terminate_string(const struct xdr_buf *, const u32); size_t xdr_buf_pagecount(const struct xdr_buf *buf); -int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); +int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp, unsigned int head, unsigned int tail); void xdr_free_bvec(struct xdr_buf *buf); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 14efcc08c6f8..e55761fe1ccf 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -569,7 +569,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = xdr_alloc_bvec(xdr, GFP_KERNEL); + err = xdr_alloc_bvec(xdr, GFP_KERNEL, 0, 0); if (err < 0) goto out_unlock; @@ -1073,45 +1073,29 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, { const struct kvec *head = xdr->head; const struct kvec *tail = xdr->tail; - struct kvec kv[2]; - struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | MSG_MORE, }; - size_t sent; + struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES, }; + size_t n; int ret; *sentp = 0; - ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL, 2, 1); if (ret < 0) return ret; - kv[0].iov_base = ▮ - kv[0].iov_len = sizeof(marker); - kv[1] = *head; - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, kv, 2, sizeof(marker) + head->iov_len); + n = 2 + xdr_buf_pagecount(xdr); + bvec_set_virt(&xdr->bvec[0], &marker, sizeof(marker)); + bvec_set_virt(&xdr->bvec[1], head->iov_base, head->iov_len); + bvec_set_virt(&xdr->bvec[n], tail->iov_base, tail->iov_len); + if (tail->iov_len) + n++; + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec, n, + sizeof(marker) + xdr->len); ret = sock_sendmsg(sock, &msg); if (ret < 0) return ret; - sent = ret; - - if (!tail->iov_len) - msg.msg_flags &= ~MSG_MORE; - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec, - xdr_buf_pagecount(xdr), xdr->page_len); - ret = sock_sendmsg(sock, &msg); - if (ret < 0) - return ret; - sent += ret; - - if (tail->iov_len) { - msg.msg_flags &= ~MSG_MORE; - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, tail, 1, tail->iov_len); - ret = sock_sendmsg(sock, &msg); - if (ret < 0) - return ret; - sent += ret; - } - if (sent > 0) - *sentp = sent; - if (sent != sizeof(marker) + xdr->len) + if (ret > 0) + *sentp = ret; + if (ret != sizeof(marker) + xdr->len) return -EAGAIN; return 0; } diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 36835b2f5446..695821963849 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -141,18 +141,21 @@ size_t xdr_buf_pagecount(const struct xdr_buf *buf) } int -xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp) +xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp, unsigned int head, unsigned int tail) { - size_t i, n = xdr_buf_pagecount(buf); + size_t i, j = 0, n = xdr_buf_pagecount(buf); - if (n != 0 && buf->bvec == NULL) { - buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp); + if (head + n + tail != 0 && buf->bvec == NULL) { + buf->bvec = kmalloc_array(head + n + tail, + sizeof(buf->bvec[0]), gfp); if (!buf->bvec) return -ENOMEM; - for (i = 0; i < n; i++) { - bvec_set_page(&buf->bvec[i], buf->pages[i], PAGE_SIZE, - 0); - } + for (i = 0; i < head; i++) + bvec_set_page(&buf->bvec[j++], NULL, 0, 0); + for (i = 0; i < n; i++) + bvec_set_page(&buf->bvec[j++], buf->pages[i], PAGE_SIZE, 0); + for (i = 0; i < tail; i++) + bvec_set_page(&buf->bvec[j++], NULL, 0, 0); } return 0; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index adcbedc244d6..fdf67e84b1c7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -825,7 +825,7 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf) { - return xdr_alloc_bvec(buf, rpc_task_gfp_mask()); + return xdr_alloc_bvec(buf, rpc_task_gfp_mask(), 0, 0); } /* @@ -954,7 +954,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) if (!xprt_request_get_cong(xprt, req)) return -EBADSLT; - status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask(), 0, 0); if (status < 0) return status; req->rq_xtime = ktime_get(); @@ -2591,7 +2591,7 @@ static int bc_sendto(struct rpc_rqst *req) int err; req->rq_xtime = ktime_get(); - err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask(), 0, 0); if (err < 0) return err; err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);