Re: [PATCH 03/14] nvme: return BLK_EH_DONE from ->timeout

Ming Lei <ming.lei@xxxxxxxxxx> · Thu, 24 May 2018 12:45:15 +0800

On Wed, May 23, 2018 at 02:19:30PM +0200, Christoph Hellwig wrote:
> NVMe always completes the request before returning from ->timeout, either
> by polling for it, or by disabling the controller.  Return BLK_EH_DONE so
> that the block layer doesn't even try to complete it again.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  drivers/nvme/host/pci.c    | 14 +++++---------
>  drivers/nvme/host/rdma.c   |  2 +-
>  drivers/nvme/target/loop.c |  2 +-
>  3 files changed, 7 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 917e1714f7d9..31525324b79f 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -1205,7 +1205,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>  		nvme_warn_reset(dev, csts);
>  		nvme_dev_disable(dev, false);
>  		nvme_reset_ctrl(&dev->ctrl);
> -		return BLK_EH_HANDLED;
> +		return BLK_EH_DONE;
>  	}
>  
>  	/*
> @@ -1215,14 +1215,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>  		dev_warn(dev->ctrl.device,
>  			 "I/O %d QID %d timeout, completion polled\n",
>  			 req->tag, nvmeq->qid);
> -		return BLK_EH_HANDLED;
> +		return BLK_EH_DONE;
>  	}
>  
>  	/*
>  	 * Shutdown immediately if controller times out while starting. The
>  	 * reset work will see the pci device disabled when it gets the forced
>  	 * cancellation error. All outstanding requests are completed on
> -	 * shutdown, so we return BLK_EH_HANDLED.
> +	 * shutdown, so we return BLK_EH_DONE.
>  	 */
>  	switch (dev->ctrl.state) {
>  	case NVME_CTRL_CONNECTING:
> @@ -1232,7 +1232,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>  			 req->tag, nvmeq->qid);
>  		nvme_dev_disable(dev, false);
>  		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
> -		return BLK_EH_HANDLED;
> +		return BLK_EH_DONE;
>  	default:
>  		break;
>  	}
> @@ -1249,12 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>  		nvme_dev_disable(dev, false);
>  		nvme_reset_ctrl(&dev->ctrl);
>  
> -		/*
> -		 * Mark the request as handled, since the inline shutdown
> -		 * forces all outstanding requests to complete.
> -		 */
>  		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
> -		return BLK_EH_HANDLED;
> +		return BLK_EH_DONE;
>  	}
>  
>  	if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 1eb4438a8763..ac7462cd7f0f 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -1598,7 +1598,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
>  	/* fail with DNR on cmd timeout */
>  	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
>  
> -	return BLK_EH_HANDLED;
> +	return BLK_EH_DONE;
>  }
>  
>  static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
> diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
> index 27a8561c0cb9..22e3627bf16b 100644
> --- a/drivers/nvme/target/loop.c
> +++ b/drivers/nvme/target/loop.c
> @@ -146,7 +146,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
>  	/* fail with DNR on admin cmd timeout */
>  	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
>  
> -	return BLK_EH_HANDLED;
> +	return BLK_EH_DONE;
>  }
>  
>  static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
> -- 
> 2.17.0
> 

This change should have been done after '[PATCH 13/14] blk-mq: Remove
generation seqeunce', otherwise the timed-out request won't be completed
by nvme_cancel_request() at all because we always marked this request as
'COMPLETE' before calling .timeout().

Thanks,
Ming