commit cddce0116058 ("nbd: Aovid double completion of a request") try to fix that nbd_clear_que() and recv_work() can complete a request concurrently. However, the problem still exists: t1 t2 t3 nbd_disconnect_and_put flush_workqueue recv_work blk_mq_complete_request blk_mq_complete_request_remote -> this is true WRITE_ONCE(rq->state, MQ_RQ_COMPLETE) blk_mq_raise_softirq blk_done_softirq blk_complete_reqs nbd_complete_rq blk_mq_end_request blk_mq_free_request WRITE_ONCE(rq->state, MQ_RQ_IDLE) nbd_clear_que blk_mq_tagset_busy_iter nbd_clear_req __blk_mq_free_request blk_mq_put_tag blk_mq_complete_request There are three places where request can be completed in nbd: recv_work(), nbd_clear_que() and nbd_xmit_timeout(). Since they all hold cmd->lock before completing the request, it's easy to avoid the problem by setting and checking a cmd flag. Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- drivers/block/nbd.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7b9e19675224..4d5098d01758 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -416,12 +416,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); struct nbd_device *nbd = cmd->nbd; struct nbd_config *config; + bool need_complete; if (!mutex_trylock(&cmd->lock)) return BLK_EH_RESET_TIMER; if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; + need_complete = + test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); goto done; } @@ -490,11 +493,13 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n"); set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags); cmd->status = BLK_STS_IOERR; + need_complete = test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); sock_shutdown(nbd); nbd_config_put(nbd); done: - blk_mq_complete_request(req); + if (need_complete) + blk_mq_complete_request(req); return BLK_EH_DONE; } @@ -849,6 +854,7 @@ static void recv_work(struct work_struct *work) static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + bool need_complete; /* don't abort one completed request */ if (blk_mq_request_completed(req)) @@ -856,9 +862,11 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) mutex_lock(&cmd->lock); cmd->status = BLK_STS_IOERR; + need_complete = test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); - blk_mq_complete_request(req); + if (need_complete) + blk_mq_complete_request(req); return true; } -- 2.31.1