Re: [PATCH V5 3/4] scsi: ufs: Fix device and host reset methods

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Tested-by: Dolev Raviv <draviv@xxxxxxxxxxxxxx>

> As of now SCSI initiated error handling is broken because,
> the reset APIs don't try to bring back the device initialized and
> ready for further transfers.
>
> In case of timeouts, the scsi error handler takes care of handling aborts
> and resets. Improve the error handling in such scenario by resetting the
> device and host and re-initializing them in proper manner.
>
> Signed-off-by: Sujit Reddy Thumma <sthumma@xxxxxxxxxxxxxx>
> ---
>  drivers/scsi/ufs/ufshcd.c |  240
> +++++++++++++++++++++++++++++++++++----------
>  drivers/scsi/ufs/ufshcd.h |    2 +
>  2 files changed, 189 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index d4ee48d..c577e6e 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -69,9 +69,14 @@ enum {
>
>  /* UFSHCD states */
>  enum {
> -	UFSHCD_STATE_OPERATIONAL,
>  	UFSHCD_STATE_RESET,
>  	UFSHCD_STATE_ERROR,
> +	UFSHCD_STATE_OPERATIONAL,
> +};
> +
> +/* UFSHCD error handling flags */
> +enum {
> +	UFSHCD_EH_IN_PROGRESS = (1 << 0),
>  };
>
>  /* Interrupt configuration options */
> @@ -87,6 +92,16 @@ enum {
>  	INT_AGGR_CONFIG,
>  };
>
> +#define ufshcd_set_eh_in_progress(h) \
> +	(h->eh_flags |= UFSHCD_EH_IN_PROGRESS)
> +#define ufshcd_eh_in_progress(h) \
> +	(h->eh_flags & UFSHCD_EH_IN_PROGRESS)
> +#define ufshcd_clear_eh_in_progress(h) \
> +	(h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS)
> +
> +static void ufshcd_tmc_handler(struct ufs_hba *hba);
> +static void ufshcd_async_scan(void *data, async_cookie_t cookie);
> +
>  /*
>   * ufshcd_wait_for_register - wait for register value to change
>   * @hba - per-adapter interface
> @@ -840,10 +855,25 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>
>  	tag = cmd->request->tag;
>
> -	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> +	spin_lock_irqsave(hba->host->host_lock, flags);
> +	switch (hba->ufshcd_state) {
> +	case UFSHCD_STATE_OPERATIONAL:
> +		break;
> +	case UFSHCD_STATE_RESET:
>  		err = SCSI_MLQUEUE_HOST_BUSY;
> -		goto out;
> +		goto out_unlock;
> +	case UFSHCD_STATE_ERROR:
> +		set_host_byte(cmd, DID_ERROR);
> +		cmd->scsi_done(cmd);
> +		goto out_unlock;
> +	default:
> +		dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
> +				__func__, hba->ufshcd_state);
> +		set_host_byte(cmd, DID_BAD_TARGET);
> +		cmd->scsi_done(cmd);
> +		goto out_unlock;
>  	}
> +	spin_unlock_irqrestore(hba->host->host_lock, flags);
>
>  	/* acquire the tag to make sure device cmds don't use it */
>  	if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) {
> @@ -880,6 +910,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host,
> struct scsi_cmnd *cmd)
>  	/* issue command to the controller */
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	ufshcd_send_command(hba, tag);
> +out_unlock:
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  out:
>  	return err;
> @@ -1495,8 +1526,6 @@ static int ufshcd_make_hba_operational(struct
> ufs_hba *hba)
>  	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
>  		scsi_unblock_requests(hba->host);
>
> -	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> -
>  out:
>  	return err;
>  }
> @@ -2245,8 +2274,12 @@ static void ufshcd_err_handler(struct ufs_hba *hba)
>  	}
>  	return;
>  fatal_eh:
> -	hba->ufshcd_state = UFSHCD_STATE_ERROR;
> -	schedule_work(&hba->feh_workq);
> +	/* handle fatal errors only when link is functional */
> +	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
> +		/* block commands at driver layer until error is handled */
> +		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		schedule_work(&hba->feh_workq);
> +	}
>  }
>
>  /**
> @@ -2411,12 +2444,13 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba
> *hba, int lun_id, int task_id,
>  }
>
>  /**
> - * ufshcd_device_reset - reset device and abort all the pending commands
> + * ufshcd_eh_device_reset_handler - device reset handler registered to
> + *                                    scsi layer.
>   * @cmd: SCSI command pointer
>   *
>   * Returns SUCCESS/FAILED
>   */
> -static int ufshcd_device_reset(struct scsi_cmnd *cmd)
> +static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>  {
>  	struct Scsi_Host *host;
>  	struct ufs_hba *hba;
> @@ -2425,6 +2459,7 @@ static int ufshcd_device_reset(struct scsi_cmnd
> *cmd)
>  	int err;
>  	u8 resp = 0xF;
>  	struct ufshcd_lrb *lrbp;
> +	unsigned long flags;
>
>  	host = cmd->device->host;
>  	hba = shost_priv(host);
> @@ -2433,55 +2468,33 @@ static int ufshcd_device_reset(struct scsi_cmnd
> *cmd)
>  	lrbp = &hba->lrb[tag];
>  	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
>  	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> -		err = FAILED;
> +		if (!err)
> +			err = resp;
>  		goto out;
> -	} else {
> -		err = SUCCESS;
>  	}
>
> -	for (pos = 0; pos < hba->nutrs; pos++) {
> -		if (test_bit(pos, &hba->outstanding_reqs) &&
> -		    (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
> -
> -			/* clear the respective UTRLCLR register bit */
> -			ufshcd_utrl_clear(hba, pos);
> -
> -			clear_bit(pos, &hba->outstanding_reqs);
> -
> -			if (hba->lrb[pos].cmd) {
> -				scsi_dma_unmap(hba->lrb[pos].cmd);
> -				hba->lrb[pos].cmd->result =
> -					DID_ABORT << 16;
> -				hba->lrb[pos].cmd->scsi_done(cmd);
> -				hba->lrb[pos].cmd = NULL;
> -				clear_bit_unlock(pos, &hba->lrb_in_use);
> -				wake_up(&hba->dev_cmd.tag_wq);
> -			}
> +	/* clear the commands that were pending for corresponding LUN */
> +	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
> +		if (hba->lrb[pos].lun == lrbp->lun) {
> +			err = ufshcd_clear_cmd(hba, pos);
> +			if (err)
> +				break;
>  		}
> -	} /* end of for */
> +	}
> +	spin_lock_irqsave(host->host_lock, flags);
> +	ufshcd_transfer_req_compl(hba);
> +	spin_unlock_irqrestore(host->host_lock, flags);
>  out:
> +	if (!err) {
> +		err = SUCCESS;
> +	} else {
> +		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
> +		err = FAILED;
> +	}
>  	return err;
>  }
>
>  /**
> - * ufshcd_host_reset - Main reset function registered with scsi layer
> - * @cmd: SCSI command pointer
> - *
> - * Returns SUCCESS/FAILED
> - */
> -static int ufshcd_host_reset(struct scsi_cmnd *cmd)
> -{
> -	struct ufs_hba *hba;
> -
> -	hba = shost_priv(cmd->device->host);
> -
> -	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -		return SUCCESS;
> -
> -	return ufshcd_do_reset(hba);
> -}
> -
> -/**
>   * ufshcd_abort - abort a specific command
>   * @cmd: SCSI command pointer
>   *
> @@ -2579,6 +2592,122 @@ out:
>  }
>
>  /**
> + * ufshcd_host_reset_and_restore - reset and restore host controller
> + * @hba: per-adapter instance
> + *
> + * Note that host controller reset may issue DME_RESET to
> + * local and remote (device) Uni-Pro stack and the attributes
> + * are reset to default state.
> + *
> + * Returns zero on success, non-zero on failure
> + */
> +static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
> +{
> +	int err;
> +	async_cookie_t cookie;
> +	unsigned long flags;
> +
> +	/* Reset the host controller */
> +	spin_lock_irqsave(hba->host->host_lock, flags);
> +	ufshcd_hba_stop(hba);
> +	spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> +	err = ufshcd_hba_enable(hba);
> +	if (err)
> +		goto out;
> +
> +	/* Establish the link again and restore the device */
> +	cookie = async_schedule(ufshcd_async_scan, hba);
> +	/* wait for async scan to be completed */
> +	async_synchronize_cookie(++cookie);
> +	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
> +		err = -EIO;
> +out:
> +	if (err)
> +		dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
> +
> +	return err;
> +}
> +
> +/**
> + * ufshcd_reset_and_restore - reset and re-initialize host/device
> + * @hba: per-adapter instance
> + *
> + * Reset and recover device, host and re-establish link. This
> + * is helpful to recover the communication in fatal error conditions.
> + *
> + * Returns zero on success, non-zero on failure
> + */
> +static int ufshcd_reset_and_restore(struct ufs_hba *hba)
> +{
> +	int err = 0;
> +	unsigned long flags;
> +
> +	err = ufshcd_host_reset_and_restore(hba);
> +
> +	/*
> +	 * After reset the door-bell might be cleared, complete
> +	 * outstanding requests in s/w here.
> +	 */
> +	spin_lock_irqsave(hba->host->host_lock, flags);
> +	ufshcd_transfer_req_compl(hba);
> +	ufshcd_tmc_handler(hba);
> +	spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> +	return err;
> +}
> +
> +/**
> + * ufshcd_eh_host_reset_handler - host reset handler registered to scsi
> layer
> + * @cmd - SCSI command pointer
> + *
> + * Returns SUCCESS/FAILED
> + */
> +static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
> +{
> +	int err;
> +	unsigned long flags;
> +	struct ufs_hba *hba;
> +
> +	hba = shost_priv(cmd->device->host);
> +
> +	/*
> +	 * Check if there is any race with fatal error handling.
> +	 * If so, wait for it to complete. Even though fatal error
> +	 * handling does reset and restore in some cases, don't assume
> +	 * anything out of it. We are just avoiding race here.
> +	 */
> +	do {
> +		spin_lock_irqsave(hba->host->host_lock, flags);
> +		if (!(work_pending(&hba->feh_workq) ||
> +				hba->ufshcd_state == UFSHCD_STATE_RESET))
> +			break;
> +		spin_unlock_irqrestore(hba->host->host_lock, flags);
> +		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
> +		flush_work_sync(&hba->feh_workq);
> +	} while (1);
> +
> +	hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_eh_in_progress(hba);
> +	spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> +	err = ufshcd_reset_and_restore(hba);
> +
> +	spin_lock_irqsave(hba->host->host_lock, flags);
> +	if (!err) {
> +		err = SUCCESS;
> +		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +	} else {
> +		err = FAILED;
> +		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +	}
> +	ufshcd_clear_eh_in_progress(hba);
> +	spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> +	return err;
> +}
> +
> +/**
>   * ufshcd_async_scan - asynchronous execution for link startup
>   * @data: data pointer to pass to this function
>   * @cookie: cookie data
> @@ -2601,8 +2730,13 @@ static void ufshcd_async_scan(void *data,
> async_cookie_t cookie)
>  		goto out;
>
>  	ufshcd_force_reset_auto_bkops(hba);
> -	scsi_scan_host(hba->host);
> -	pm_runtime_put_sync(hba->dev);
> +	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +
> +	/* If we are in error handling context no need to scan the host */
> +	if (!ufshcd_eh_in_progress(hba)) {
> +		scsi_scan_host(hba->host);
> +		pm_runtime_put_sync(hba->dev);
> +	}
>  out:
>  	return;
>  }
> @@ -2615,8 +2749,8 @@ static struct scsi_host_template
> ufshcd_driver_template = {
>  	.slave_alloc		= ufshcd_slave_alloc,
>  	.slave_destroy		= ufshcd_slave_destroy,
>  	.eh_abort_handler	= ufshcd_abort,
> -	.eh_device_reset_handler = ufshcd_device_reset,
> -	.eh_host_reset_handler	= ufshcd_host_reset,
> +	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
> +	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
>  	.this_id		= -1,
>  	.sg_tablesize		= SG_ALL,
>  	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index fe7c947..1e0585c 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -179,6 +179,7 @@ struct ufs_dev_cmd {
>   * @tm_condition: condition variable for task management
>   * @tm_slots_in_use: bit map of task management request slots in use
>   * @ufshcd_state: UFSHCD states
> + * @eh_flags: Error handling flags
>   * @intr_mask: Interrupt Mask Bits
>   * @ee_ctrl_mask: Exception event control mask
>   * @feh_workq: Work queue for fatal controller error handling
> @@ -224,6 +225,7 @@ struct ufs_hba {
>  	unsigned long tm_slots_in_use;
>
>  	u32 ufshcd_state;
> +	u32 eh_flags;
>  	u32 intr_mask;
>  	u16 ee_ctrl_mask;
>
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


-- 
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux