Hi Ben, On Wed, 2019-10-02 at 07:03 -0400, Benjamin Coddington wrote: > Since commit 4f8943f80883 ("SUNRPC: Replace direct task wakeups from > softirq context") there has been a race to the value of the sk_err if both > XPRT_SOCK_WAKE_ERROR and XPRT_SOCK_WAKE_DISCONNECT are set. In that case, > we may end up losing the sk_err value that existed when xs_error_report was > called. > > Fix this by reverting to the previous behavior: instead of using SO_ERROR > to retrieve the value at a later time (which might also return sk_err_soft), > copy the sk_err value onto struct sock_xprt, and use that value to wake > pending tasks. > > Signed-off-by: Benjamin Coddington <bcodding@xxxxxxxxxx> > Fixes: 4f8943f80883 ("SUNRPC: Replace direct task wakeups from softirq context") > 8<----------------------------------------------------------------------------- > > Changes on V2: > - move xprt_err into a hole in struct sock_xprt > - add an memory barrier to ensure the error is visble to the > error_worker > > --- > include/linux/sunrpc/xprtsock.h | 1 + > net/sunrpc/xprtsock.c | 16 ++++++++-------- > 2 files changed, 9 insertions(+), 8 deletions(-) > > diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h > index 7638dbe7bc50..a940de03808d 100644 > --- a/include/linux/sunrpc/xprtsock.h > +++ b/include/linux/sunrpc/xprtsock.h > @@ -61,6 +61,7 @@ struct sock_xprt { > struct mutex recv_mutex; > struct sockaddr_storage srcaddr; > unsigned short srcport; > + int xprt_err; > > /* > * UDP socket buffer size parameters > diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c > index e2176c167a57..25ff097d6e21 100644 > --- a/net/sunrpc/xprtsock.c > +++ b/net/sunrpc/xprtsock.c > @@ -1250,12 +1250,15 @@ static void xs_error_report(struct sock *sk) > goto out; > > transport = container_of(xprt, struct sock_xprt, xprt); > - err = -sk->sk_err; It looks like "err" is unused after this change, so can you remove the variable declaration as well? Thanks, Anna > - if (err == 0) > + transport->xprt_err = -sk->sk_err; > + if (transport->xprt_err == 0) > goto out; > dprintk("RPC: xs_error_report client %p, error=%d...\n", > - xprt, -err); > - trace_rpc_socket_error(xprt, sk->sk_socket, err); > + xprt, -transport->xprt_err); > + trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err); > + > + /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ > + smp_mb__before_atomic(); > xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); > out: > read_unlock_bh(&sk->sk_callback_lock); > @@ -2470,7 +2473,6 @@ static void xs_wake_write(struct sock_xprt *transport) > static void xs_wake_error(struct sock_xprt *transport) > { > int sockerr; > - int sockerr_len = sizeof(sockerr); > > if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) > return; > @@ -2479,9 +2481,7 @@ static void xs_wake_error(struct sock_xprt *transport) > goto out; > if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) > goto out; > - if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR, > - (char *)&sockerr, &sockerr_len) != 0) > - goto out; > + sockerr = xchg(&transport->xprt_err, 0); > if (sockerr < 0) > xprt_wake_pending_tasks(&transport->xprt, sockerr); > out: