Re: [PATCH V2 0/2] block: remove unnecessary RESTART

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2017-11-03 at 23:18 +0800, Ming Lei wrote:
> Subject: [PATCH] SCSI_MQ: fix IO hang in case of queue busy
> 
> We have to insert the rq back before checking .device_busy,
> otherwise When IO completes just after the check and before
> this req is added to hctx->dispatch, this queue may never get
> chance to be run, then this IO may hang forever.
> 
> This patch introduces BLK_STS_RESOURCE_OK for handling this
> issue.
> 
> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> ---
>  block/blk-mq.c            | 17 +++++++++++++++++
>  drivers/scsi/scsi_lib.c   |  8 ++++++++
>  include/linux/blk-mq.h    |  1 +
>  include/linux/blk_types.h |  1 +
>  4 files changed, 27 insertions(+)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index e4d2490f4e7e..e1e03576edca 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -660,6 +660,16 @@ static void __blk_mq_requeue_request(struct request *rq)
>  	}
>  }
>  
> +void blk_mq_reinsert_request_hctx(struct blk_mq_hw_ctx *hctx, struct request *rq)
> +{
> +	__blk_mq_requeue_request(rq);
> +
> +	spin_lock(&hctx->lock);
> +	list_add_tail(&rq->queuelist, &hctx->dispatch);
> +	spin_unlock(&hctx->lock);
> +}
> +EXPORT_SYMBOL(blk_mq_reinsert_request_hctx);
> +
>  void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
>  {
>  	__blk_mq_requeue_request(rq);
> @@ -1165,6 +1175,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
>  			list_add(&rq->queuelist, list);
>  			__blk_mq_requeue_request(rq);
>  			break;
> +		} else if (ret == BLK_STS_RESOURCE_OK) {
> +			/*
> +			 * BLK_STS_RESOURCE_OK means driver handled this
> +			 * STS_RESOURCE already, we just need to stop dispatch.
> +			 */
> +			break;
>  		}
>  
>   fail_rq:
> @@ -1656,6 +1672,7 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
>  	ret = q->mq_ops->queue_rq(hctx, &bd);
>  	switch (ret) {
>  	case BLK_STS_OK:
> +	case BLK_STS_RESOURCE_OK:
>  		*cookie = new_cookie;
>  		return;
>  	case BLK_STS_RESOURCE:
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 7f218ef61900..0165c1caed82 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -2030,9 +2030,17 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case BLK_STS_OK:
>  		break;
>  	case BLK_STS_RESOURCE:
> +		/*
> +		 * We have to insert the rq back before checking .device_busy,
> +		 * otherwise when IO completes just after the check and before
> +		 * this req is added to hctx->dispatch, this queue may never get
> +		 * chance to be run, then this IO may hang forever.
> +		 */
> +		blk_mq_reinsert_request_hctx(hctx, req);
>  		if (atomic_read(&sdev->device_busy) == 0 &&
>  		    !scsi_device_blocked(sdev))
>  			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
> +		ret = BLK_STS_RESOURCE_OK;
>  		break;
>  	default:
>  		/*
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index e5e6becd57d3..4740f643d8c5 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -244,6 +244,7 @@ void blk_mq_start_request(struct request *rq);
>  void blk_mq_end_request(struct request *rq, blk_status_t error);
>  void __blk_mq_end_request(struct request *rq, blk_status_t error);
>  
> +void blk_mq_reinsert_request_hctx(struct blk_mq_hw_ctx *hctx, struct request *rq);
>  void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
>  void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
>  				bool kick_requeue_list);
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 3385c89f402e..b630cc026a93 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -32,6 +32,7 @@ typedef u8 __bitwise blk_status_t;
>  #define BLK_STS_PROTECTION	((__force blk_status_t)8)
>  #define BLK_STS_RESOURCE	((__force blk_status_t)9)
>  #define BLK_STS_IOERR		((__force blk_status_t)10)
> +#define BLK_STS_RESOURCE_OK	((__force blk_status_t)11)
>  
>  /* hack for device mapper, don't use elsewhere: */
>  #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)

Running test one with this patch applied on top of Jens' for-next branch yields
the same result as without this patch:
[ ... ]
Error: unloading kernel module ib_srp failed
Test /home/bart/software/infiniband/srp-test/tests/01 failed
# dmesg -c >/dev/null; echo w > /proc/sysrq-trigger; dmesg
[  325.167295] sysrq: SysRq : Show Blocked State
[  325.173612]   task                        PC stack   pid father
[  325.181903] kworker/10:1    D    0   165      2 0x80000000
[  325.189396] Workqueue: srp_remove srp_remove_work [ib_srp]
[  325.196222] Call Trace:
[  325.199622]  __schedule+0x2fa/0xbb0
[  325.204227]  schedule+0x36/0x90
[  325.208390]  async_synchronize_cookie_domain+0x88/0x130
[  325.214915]  ? finish_wait+0x90/0x90
[  325.219574]  async_synchronize_full_domain+0x18/0x20
[  325.225816]  sd_remove+0x4d/0xc0 [sd_mod]
[  325.230949]  device_release_driver_internal+0x160/0x210
[  325.237479]  device_release_driver+0x12/0x20
[  325.242904]  bus_remove_device+0x100/0x180
[  325.248163]  device_del+0x1d8/0x340
[  325.252735]  __scsi_remove_device+0xfc/0x130
[  325.258192]  scsi_forget_host+0x25/0x70
[  325.263173]  scsi_remove_host+0x79/0x120
[  325.268227]  srp_remove_work+0x90/0x1d0 [ib_srp]
[  325.274059]  process_one_work+0x20a/0x660
[  325.279243]  worker_thread+0x3d/0x3b0
[  325.284004]  kthread+0x13a/0x150
[  325.288232]  ? process_one_work+0x660/0x660
[  325.293575]  ? kthread_create_on_node+0x40/0x40
[  325.299334]  ret_from_fork+0x27/0x40
[  325.304002] kworker/5:1     D    0   180      2 0x80000000
[  325.310768] Workqueue: kaluad alua_rtpg_work [scsi_dh_alua]
[  325.317674] Call Trace:
[  325.321047]  __schedule+0x2fa/0xbb0
[  325.325615]  ? trace_hardirqs_on_caller+0xfb/0x190
[  325.331612]  schedule+0x36/0x90
[  325.335797]  schedule_timeout+0x22c/0x570
[  325.340917]  ? call_timer_fn+0x330/0x330
[  325.345972]  ? wait_for_completion_io_timeout+0xf7/0x180
[  325.352558]  io_schedule_timeout+0x1e/0x50
[  325.357808]  ? io_schedule_timeout+0x1e/0x50
[  325.363262]  wait_for_completion_io_timeout+0x11f/0x180
[  325.369793]  ? wake_up_q+0x80/0x80
[  325.374267]  blk_execute_rq+0x86/0xc0
[  325.379012]  scsi_execute+0xdb/0x1f0
[  325.383678]  alua_rtpg_work+0x2b3/0xe8a [scsi_dh_alua]
[  325.390130]  process_one_work+0x20a/0x660
[  325.395818]  worker_thread+0x3d/0x3b0
[  325.401248]  kthread+0x13a/0x150
[  325.406056]  ? process_one_work+0x660/0x660
[  325.412070]  ? kthread_create_on_node+0x40/0x40
[  325.418474]  ret_from_fork+0x27/0x40
[  325.423874] kworker/u66:1   D    0   331      2 0x80000000
[  325.431374] Workqueue: events_unbound async_run_entry_fn
[  325.438651] Call Trace:
[  325.442662]  __schedule+0x2fa/0xbb0
[  325.447835]  ? trace_hardirqs_on_caller+0xfb/0x190
[  325.454522]  schedule+0x36/0x90
[  325.459339]  schedule_timeout+0x22c/0x570
[  325.465097]  ? call_timer_fn+0x330/0x330
[  325.470800]  ? wait_for_completion_io_timeout+0xf7/0x180
[  325.478017]  io_schedule_timeout+0x1e/0x50
[  325.483935]  ? io_schedule_timeout+0x1e/0x50
[  325.490015]  wait_for_completion_io_timeout+0x11f/0x180
[  325.497134]  ? wake_up_q+0x80/0x80
[  325.502272]  blk_execute_rq+0x86/0xc0
[  325.507652]  scsi_execute+0xdb/0x1f0
[  325.512960]  sd_revalidate_disk+0xed/0x1c70 [sd_mod]
[  325.519846]  sd_probe_async+0xc3/0x1d0 [sd_mod]
[  325.526199]  async_run_entry_fn+0x38/0x160
[  325.532121]  process_one_work+0x20a/0x660
[  325.537897]  worker_thread+0x3d/0x3b0
[  325.543319]  kthread+0x13a/0x150
[  325.548200]  ? process_one_work+0x660/0x660
[  325.554204]  ? kthread_create_on_node+0x40/0x40
[  325.560605]  ret_from_fork+0x27/0x40




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux