On Fri, 2023-12-08 at 14:19 -0500, Benjamin Coddington wrote: > After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out > when on > the sending list"), any 4.1 backchannel tasks placed on the sending > queue > would immediately return with -ETIMEDOUT since their req timers are > zero. > We can fix this by keeping a copy of the rpc_clnt's timeout params on > the > transport and using them to properly setup the timeouts on the v4.1 > backchannel tasks' req. > > Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on > the sending list") > Signed-off-by: Benjamin Coddington <bcodding@xxxxxxxxxx> > --- > include/linux/sunrpc/xprt.h | 1 + > net/sunrpc/clnt.c | 3 +++ > net/sunrpc/xprt.c | 23 ++++++++++++++--------- > 3 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/include/linux/sunrpc/xprt.h > b/include/linux/sunrpc/xprt.h > index f85d3a0daca2..7565902053f3 100644 > --- a/include/linux/sunrpc/xprt.h > +++ b/include/linux/sunrpc/xprt.h > @@ -285,6 +285,7 @@ struct rpc_xprt { > * items */ > struct list_head bc_pa_list; /* List of > preallocated > * backchannel > rpc_rqst's */ > + struct rpc_timeout bc_timeout; /* backchannel > timeout params */ > #endif /* CONFIG_SUNRPC_BACKCHANNEL */ > > struct rb_root recv_queue; /* Receive queue */ > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c > index d6805c1268a7..5891757c88b1 100644 > --- a/net/sunrpc/clnt.c > +++ b/net/sunrpc/clnt.c > @@ -279,6 +279,9 @@ static struct rpc_xprt > *rpc_clnt_set_transport(struct rpc_clnt *clnt, > clnt->cl_autobind = 1; > > clnt->cl_timeout = timeout; > +#if defined(CONFIG_SUNRPC_BACKCHANNEL) > + memcpy(&xprt->bc_timeout, timeout, sizeof(struct > rpc_timeout)); > +#endif Hmm... The xprt can and will be shared among a number of rpc_clnt instances. I therefore think we're better off doing this when we're setting up the back channel. i.e. probably doing it in nfs41_init_clientid() after we picked up the lease time (but before we mark the client as ready), and then doing it in nfs4_proc_bind_conn_to_session() if ever that gets called. Note that we have to set the bc_timeout on all xprts that could act as back channels, so you might want to use rpc_clnt_iterate_for_each_xprt(). It might also be worth to look at Olga's trunking code, since I suspect we might need to do something there when adding a new xprt to the existing set. > rcu_assign_pointer(clnt->cl_xprt, xprt); > spin_unlock(&clnt->cl_lock); > > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c > index 92301e32cda4..7dce780869fc 100644 > --- a/net/sunrpc/xprt.c > +++ b/net/sunrpc/xprt.c > @@ -653,9 +653,9 @@ static unsigned long > xprt_abs_ktime_to_jiffies(ktime_t abstime) > jiffies + nsecs_to_jiffies(-delta); > } > > -static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) > +static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req, > + const struct rpc_timeout *to) > { > - const struct rpc_timeout *to = req->rq_task->tk_client- > >cl_timeout; > unsigned long majortimeo = req->rq_timeout; > > if (to->to_exponential) > @@ -667,9 +667,10 @@ static unsigned long xprt_calc_majortimeo(struct > rpc_rqst *req) > return majortimeo; > } > > -static void xprt_reset_majortimeo(struct rpc_rqst *req) > +static void xprt_reset_majortimeo(struct rpc_rqst *req, > + const struct rpc_timeout *to) > { > - req->rq_majortimeo += xprt_calc_majortimeo(req); > + req->rq_majortimeo += xprt_calc_majortimeo(req, to); > } > > static void xprt_reset_minortimeo(struct rpc_rqst *req) > @@ -677,7 +678,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst > *req) > req->rq_minortimeo += req->rq_timeout; > } > > -static void xprt_init_majortimeo(struct rpc_task *task, struct > rpc_rqst *req) > +static void xprt_init_majortimeo(struct rpc_task *task, struct > rpc_rqst *req, > + const struct rpc_timeout *to) > { > unsigned long time_init; > struct rpc_xprt *xprt = req->rq_xprt; > @@ -686,8 +688,9 @@ static void xprt_init_majortimeo(struct rpc_task > *task, struct rpc_rqst *req) > time_init = jiffies; > else > time_init = xprt_abs_ktime_to_jiffies(task- > >tk_start); > - req->rq_timeout = task->tk_client->cl_timeout->to_initval; > - req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); > + > + req->rq_timeout = to->to_initval; > + req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, > to); > req->rq_minortimeo = time_init + req->rq_timeout; > } > > @@ -715,7 +718,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) > } else { > req->rq_timeout = to->to_initval; > req->rq_retries = 0; > - xprt_reset_majortimeo(req); > + xprt_reset_majortimeo(req, to); > /* Reset the RTT counters == "slow start" */ > spin_lock(&xprt->transport_lock); > rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to- > >to_initval); > @@ -1888,7 +1891,7 @@ xprt_request_init(struct rpc_task *task) > req->rq_snd_buf.bvec = NULL; > req->rq_rcv_buf.bvec = NULL; > req->rq_release_snd_buf = NULL; > - xprt_init_majortimeo(task, req); > + xprt_init_majortimeo(task, req, task->tk_client- > >cl_timeout); > > trace_xprt_reserve(req); > } > @@ -1998,6 +2001,8 @@ xprt_init_bc_request(struct rpc_rqst *req, > struct rpc_task *task) > */ > xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + > xbufp->tail[0].iov_len; > + > + xprt_init_majortimeo(task, req, &req->rq_xprt->bc_timeout); > } > #endif > Otherwise, looks good. -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@xxxxxxxxxxxxxxx