On Wed, 2023-12-13 at 14:49 -0500, Benjamin Coddington wrote: > After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on > the sending list"), any 4.1 backchannel tasks placed on the sending queue > would immediately return with -ETIMEDOUT since their req timers are zero. > > Initialize the backchannel's rpc_rqst timeout parameters from the xprt's > default timeout settings. > > Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on the sending list") > Signed-off-by: Benjamin Coddington <bcodding@xxxxxxxxxx> > --- > net/sunrpc/xprt.c | 23 ++++++++++++++--------- > 1 file changed, 14 insertions(+), 9 deletions(-) > > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c > index 2364c485540c..6cc9ffac962d 100644 > --- a/net/sunrpc/xprt.c > +++ b/net/sunrpc/xprt.c > @@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime) > jiffies + nsecs_to_jiffies(-delta); > } > > -static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) > +static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req, > + const struct rpc_timeout *to) > { > - const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; > unsigned long majortimeo = req->rq_timeout; > > if (to->to_exponential) > @@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) > return majortimeo; > } > > -static void xprt_reset_majortimeo(struct rpc_rqst *req) > +static void xprt_reset_majortimeo(struct rpc_rqst *req, > + const struct rpc_timeout *to) > { > - req->rq_majortimeo += xprt_calc_majortimeo(req); > + req->rq_majortimeo += xprt_calc_majortimeo(req, to); > } > > static void xprt_reset_minortimeo(struct rpc_rqst *req) > @@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req) > req->rq_minortimeo += req->rq_timeout; > } > > -static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) > +static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req, > + const struct rpc_timeout *to) > { > unsigned long time_init; > struct rpc_xprt *xprt = req->rq_xprt; > @@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) > time_init = jiffies; > else > time_init = xprt_abs_ktime_to_jiffies(task->tk_start); > - req->rq_timeout = task->tk_client->cl_timeout->to_initval; > - req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); > + > + req->rq_timeout = to->to_initval; > + req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to); > req->rq_minortimeo = time_init + req->rq_timeout; > } > > @@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) > } else { > req->rq_timeout = to->to_initval; > req->rq_retries = 0; > - xprt_reset_majortimeo(req); > + xprt_reset_majortimeo(req, to); > /* Reset the RTT counters == "slow start" */ > spin_lock(&xprt->transport_lock); > rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); > @@ -1886,7 +1889,7 @@ xprt_request_init(struct rpc_task *task) > req->rq_snd_buf.bvec = NULL; > req->rq_rcv_buf.bvec = NULL; > req->rq_release_snd_buf = NULL; > - xprt_init_majortimeo(task, req); > + xprt_init_majortimeo(task, req, task->tk_client->cl_timeout); > > trace_xprt_reserve(req); > } > @@ -1996,6 +1999,8 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) > */ > xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + > xbufp->tail[0].iov_len; > + > + xprt_init_majortimeo(task, req, req->rq_xprt->timeout); > } > #endif > As I mentioned in the email thread here, I've been seeing some hangs with v6.7-rc8 kernels when testing TLS support: https://lore.kernel.org/linux-nfs/8C3DFB5D-B967-4D59-BFC5-7B25315DB9AB@xxxxxxxxxx/T/#t With this patchset in place, I've been unable to reproduce it. I'm not sure if this is the right way to fix this, but this does seem to be a real problem and I have a semi-reliable way to reproduce it if you need me to test a fix for it. Cheers, -- Jeff Layton <jlayton@xxxxxxxxxx>