Re: Need help debugging NFS issues new to 4.20 kernel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>>>>> "BC" == Benjamin Coddington <bcodding@xxxxxxxxxx> writes:

BC> Hmm.. commit c443305529d1d3d3bee0d68fdd14ae89835e091f changed
BC> xs_read_stream_reply() to return recv.copied instead of "ret" to
BC> xprt_complete_rqst()..

BC> You could try reverting that commit and see if the problem goes
BC> away..

So patching a revert of that into 4.20.7 was beyond me, but I received
some help from Jeremy Cline on IRC (in #fedora-kernel) and ended up with
a patch I'll include at the end.  So far it does seem to be better, but
because of secure boot annoyances I haven't been able to roll it out
more generally.  However, it has been stable for a week on a few hosts
which have been problematic with stock 4.20.6.

I will continue to test, but hopefully this helps folks to understand
what's happening.

 - J<

>From 322f581f514ffedb8884656f136bd6a812a53714 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@xxxxxxxxxx>
Date: Fri, 8 Feb 2019 13:09:41 -0500
Subject: [PATCH] Revert "SUNRPC: Fix RPC receive hangs"

This reverts commit c443305529d1d3d3bee0d68fdd14ae89835e091f.

Signed-off-by: Jeremy Cline <jcline@xxxxxxxxxx>
---
 net/sunrpc/xprtsock.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9cdbb6d6e7f5..2d9f0326d55b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -417,7 +417,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
 			goto out;
 		if (ret != want)
-			goto out;
+			goto eagain;
 		seek = 0;
 	} else {
 		seek -= buf->head[0].iov_len;
@@ -439,7 +439,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
 			goto out;
 		if (ret != want)
-			goto out;
+			goto eagain;
 		seek = 0;
 	} else {
 		seek -= want;
@@ -455,13 +455,16 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
 			goto out;
 		if (ret != want)
-			goto out;
+			goto eagain;
 	} else
 		offset += buf->tail[0].iov_len;
 	ret = -EMSGSIZE;
 out:
 	*read = offset - seek_init;
 	return ret;
+eagain:
+	ret = -EAGAIN;
+	goto out;
 sock_err:
 	offset += seek;
 	goto out;
@@ -504,20 +507,21 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
 	if (transport->recv.offset == transport->recv.len) {
 		if (xs_read_stream_request_done(transport))
 			msg->msg_flags |= MSG_EOR;
-		return read;
+		return transport->recv.copied;
 	}
 
 	switch (ret) {
-	default:
-		break;
 	case -EFAULT:
 	case -EMSGSIZE:
 		msg->msg_flags |= MSG_TRUNC;
-		return read;
+		return transport->recv.copied;
 	case 0:
 		return -ESHUTDOWN;
+	default:
+		if (ret < 0)
+			return ret;
 	}
-	return ret < 0 ? ret : read;
+	return -EAGAIN;
 }
 
 static size_t
@@ -556,7 +560,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
 
 	ret = xs_read_stream_request(transport, msg, flags, req);
 	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
-		xprt_complete_bc_request(req, transport->recv.copied);
+		xprt_complete_bc_request(req, ret);
 
 	return ret;
 }
@@ -589,7 +593,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
 
 	spin_lock(&xprt->queue_lock);
 	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
-		xprt_complete_rqst(req->rq_task, transport->recv.copied);
+		xprt_complete_rqst(req->rq_task, ret);
 	xprt_unpin_rqst(req);
 out:
 	spin_unlock(&xprt->queue_lock);
@@ -610,8 +614,10 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 		if (ret <= 0)
 			goto out_err;
 		transport->recv.offset = ret;
-		if (transport->recv.offset != want)
-			return transport->recv.offset;
+		if (ret != want) {
+			ret = -EAGAIN;
+			goto out_err;
+		}
 		transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
 			RPC_FRAGMENT_SIZE_MASK;
 		transport->recv.offset -= sizeof(transport->recv.fraghdr);
@@ -619,9 +625,6 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 	}
 
 	switch (be32_to_cpu(transport->recv.calldir)) {
-	default:
-		msg.msg_flags |= MSG_TRUNC;
-		break;
 	case RPC_CALL:
 		ret = xs_read_stream_call(transport, &msg, flags);
 		break;
@@ -636,8 +639,6 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 		goto out_err;
 	read += ret;
 	if (transport->recv.offset < transport->recv.len) {
-		if (!(msg.msg_flags & MSG_TRUNC))
-			return read;
 		msg.msg_flags = 0;
 		ret = xs_read_discard(transport->sock, &msg, flags,
 				transport->recv.len - transport->recv.offset);
@@ -646,7 +647,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 		transport->recv.offset += ret;
 		read += ret;
 		if (transport->recv.offset != transport->recv.len)
-			return read;
+			return -EAGAIN;
 	}
 	if (xs_read_stream_request_done(transport)) {
 		trace_xs_stream_read_request(transport);
@@ -670,7 +671,7 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
 		goto out;
 	for (;;) {
 		ret = xs_read_stream(transport, MSG_DONTWAIT);
-		if (ret < 0)
+		if (ret <= 0)
 			break;
 		read += ret;
 		cond_resched();
-- 
2.20.1




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux