> On Feb 16, 2021, at 12:10 PM, Trond Myklebust <trondmy@xxxxxxxxxxxxxxx> wrote: > > On Tue, 2021-02-16 at 16:27 +0000, Chuck Lever wrote: >> Hey Trond- >> >>> On Feb 13, 2021, at 3:25 PM, trondmy@xxxxxxxxxx wrote: >>> >>> From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> >>> >>> Use a counter to keep track of how many requests are queued behind >>> the >>> xprt->xpt_mutex, and keep TCP_CORK set until the queue is empty. >>> >>> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> >> >> Let's move this forward a little bit. >> >> I'd like to see the previously discussed MSG_MORE simplification >> integrated into this patch. >> > > Hmm... I think I'd prefer to make it incremental to this one. I wasn't > too sure about whether or not removing MSG_SENDPAGE_NOTLAST makes a > difference. > AFAICS, the answer is no, but just in case, let's make it easy to back > this change out. OK. Please send an incremental patch. Thanks! >> In addition to Daire's testing, I've done some testing: >> - No behavior regressions noted >> - No changes in large I/O throughput >> - Slightly shorter RTTs on software build >> >> And the current version of the patch is now in the for-rc branch >> of https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/ >> to get broader testing exposure. >> >> This work is a candidate for a second NFSD PR during the 5.12 >> merge window, along with the other patches currently in the >> for-rc branch. >> >> >>> --- >>> include/linux/sunrpc/svcsock.h | 2 ++ >>> net/sunrpc/svcsock.c | 8 +++++++- >>> 2 files changed, 9 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/linux/sunrpc/svcsock.h >>> b/include/linux/sunrpc/svcsock.h >>> index b7ac7fe68306..bcc555c7ae9c 100644 >>> --- a/include/linux/sunrpc/svcsock.h >>> +++ b/include/linux/sunrpc/svcsock.h >>> @@ -35,6 +35,8 @@ struct svc_sock { >>> /* Total length of the data (not including fragment >>> headers) >>> * received so far in the fragments making up this rpc: */ >>> u32 sk_datalen; >>> + /* Number of queued send requests */ >>> + atomic_t sk_sendqlen; >>> >>> struct page * sk_pages[RPCSVC_MAXPAGES]; /* >>> received data */ >>> }; >>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >>> index 5a809c64dc7b..231f510a4830 100644 >>> --- a/net/sunrpc/svcsock.c >>> +++ b/net/sunrpc/svcsock.c >>> @@ -1171,18 +1171,23 @@ static int svc_tcp_sendto(struct svc_rqst >>> *rqstp) >>> >>> svc_tcp_release_rqst(rqstp); >>> >>> + atomic_inc(&svsk->sk_sendqlen); >>> mutex_lock(&xprt->xpt_mutex); >>> if (svc_xprt_is_dead(xprt)) >>> goto out_notconn; >>> + tcp_sock_set_cork(svsk->sk_sk, true); >>> err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, >>> &sent); >>> xdr_free_bvec(xdr); >>> trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); >>> if (err < 0 || sent != (xdr->len + sizeof(marker))) >>> goto out_close; >>> + if (atomic_dec_and_test(&svsk->sk_sendqlen)) >>> + tcp_sock_set_cork(svsk->sk_sk, false); >>> mutex_unlock(&xprt->xpt_mutex); >>> return sent; >>> >>> out_notconn: >>> + atomic_dec(&svsk->sk_sendqlen); >>> mutex_unlock(&xprt->xpt_mutex); >>> return -ENOTCONN; >>> out_close: >>> @@ -1192,6 +1197,7 @@ static int svc_tcp_sendto(struct svc_rqst >>> *rqstp) >>> (err < 0) ? err : sent, xdr->len); >>> set_bit(XPT_CLOSE, &xprt->xpt_flags); >>> svc_xprt_enqueue(xprt); >>> + atomic_dec(&svsk->sk_sendqlen); >>> mutex_unlock(&xprt->xpt_mutex); >>> return -EAGAIN; >>> } >>> @@ -1261,7 +1267,7 @@ static void svc_tcp_init(struct svc_sock >>> *svsk, struct svc_serv *serv) >>> svsk->sk_datalen = 0; >>> memset(&svsk->sk_pages[0], 0, sizeof(svsk- >>>> sk_pages)); >>> >>> - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; >>> + tcp_sock_set_nodelay(sk); >>> >>> set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); >>> switch (sk->sk_state) { >>> -- >>> 2.29.2 >>> >> >> -- >> Chuck Lever >> >> >> > > -- > Trond Myklebust > CTO, Hammerspace Inc > 4984 El Camino Real, Suite 208 > Los Altos, CA 94022 > > www.hammer.space -- Chuck Lever