Re: [PATCH] scsi: ufs: Fix a race between the interrupt handler and the reset handler

Adrian Hunter <adrian.hunter@xxxxxxxxx> · Mon, 13 Jun 2022 09:28:39 +0300

On 11/06/22 02:29, Bart Van Assche wrote:
> Prevent that both the interrupt handler and the reset handler try to
> complete a request at the same time. This patch is the result of the
> analysis of the following crash:
> 
> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000120
> CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OE     5.10.107-android13-4-00051-g1e48e8970cca-ab8664745 #1
> pc : ufshcd_release_scsi_cmd+0x30/0x46c
> lr : __ufshcd_transfer_req_compl+0x4fc/0x9c0
> Call trace:
>  ufshcd_release_scsi_cmd+0x30/0x46c
>  __ufshcd_transfer_req_compl+0x4fc/0x9c0
>  ufshcd_poll+0xf0/0x208
>  ufshcd_sl_intr+0xb8/0xf0
>  ufshcd_intr+0x168/0x2f4
>  __handle_irq_event_percpu+0xa0/0x30c
>  handle_irq_event+0x84/0x178
>  handle_fasteoi_irq+0x150/0x2e8
>  __handle_domain_irq+0x114/0x1e4
>  gic_handle_irq.31846+0x58/0x300
>  el1_irq+0xe4/0x1c0
>  cpuidle_enter_state+0x3ac/0x8c4
>  do_idle+0x2fc/0x55c
>  cpu_startup_entry+0x84/0x90
>  kernel_init+0x0/0x310
>  start_kernel+0x0/0x608
>  start_kernel+0x4ec/0x608
> 
> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
> ---
>  drivers/scsi/ufs/ufshcd.c | 20 +++++++++++++-------
>  1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 1fb3a8b9b03e..279691ff3562 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -6966,6 +6966,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
>   */
>  static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>  {
> +	unsigned long flags, completed_reqs = 0;
>  	struct Scsi_Host *host;
>  	struct ufs_hba *hba;
>  	u32 pos;
> @@ -6984,13 +6985,18 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>  	}
>  
>  	/* clear the commands that were pending for corresponding LUN */
> -	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
> -		if (hba->lrb[pos].lun == lun) {
> -			err = ufshcd_clear_cmd(hba, pos);
> -			if (err)
> -				break;
> -			__ufshcd_transfer_req_compl(hba, 1U << pos);
> -		}
> +	spin_lock_irqsave(&hba->outstanding_lock, flags);
> +	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
> +		if (hba->lrb[pos].lun == lun)
> +			__set_bit(pos, &completed_reqs);
> +	hba->outstanding_reqs &= ~completed_reqs;
> +	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> +
> +	for_each_set_bit(pos, &completed_reqs, hba->nutrs) {
> +		err = ufshcd_clear_cmd(hba, pos);
> +		if (err)
> +			break;

Having cleared the bit in hba->outstanding_reqs, shouldn't we
always complete the request? i.e. we should not 'break' here

> +		__ufshcd_transfer_req_compl(hba, 1U << pos);
>  	}
>  
>  out: