RE: [PATCH V5 3/4] scsi: ufs: Fix device and host reset methods

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Reviewed-by: Yaniv Gardi <ygardi@xxxxxxxxxxxxxx>

QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


= > -----Original Message-----
= > From: linux-scsi-owner@xxxxxxxxxxxxxxx [mailto:linux-scsi-
= > owner@xxxxxxxxxxxxxxx] On Behalf Of Dolev Raviv
= > Sent: Monday, August 12, 2013 4:02 PM
= > To: Sujit Reddy Thumma
= > Cc: Vinayak Holikatti; Santosh Y; James E.J. Bottomley; linux-
= > scsi@xxxxxxxxxxxxxxx; Sujit Reddy Thumma; linux-arm-
= > msm@xxxxxxxxxxxxxxx
= > Subject: Re: [PATCH V5 3/4] scsi: ufs: Fix device and host reset methods
= > 
= > Tested-by: Dolev Raviv <draviv@xxxxxxxxxxxxxx>
= > 
= > > As of now SCSI initiated error handling is broken because, the reset
= > > APIs don't try to bring back the device initialized and ready for
= > > further transfers.
= > >
= > > In case of timeouts, the scsi error handler takes care of handling
= > > aborts and resets. Improve the error handling in such scenario by
= > > resetting the device and host and re-initializing them in proper
manner.
= > >
= > > Signed-off-by: Sujit Reddy Thumma <sthumma@xxxxxxxxxxxxxx>
= > > ---
= > >  drivers/scsi/ufs/ufshcd.c |  240
= > > +++++++++++++++++++++++++++++++++++----------
= > >  drivers/scsi/ufs/ufshcd.h |    2 +
= > >  2 files changed, 189 insertions(+), 53 deletions(-)
= > >
= > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
= > > index d4ee48d..c577e6e 100644
= > > --- a/drivers/scsi/ufs/ufshcd.c
= > > +++ b/drivers/scsi/ufs/ufshcd.c
= > > @@ -69,9 +69,14 @@ enum {
= > >
= > >  /* UFSHCD states */
= > >  enum {
= > > -	UFSHCD_STATE_OPERATIONAL,
= > >  	UFSHCD_STATE_RESET,
= > >  	UFSHCD_STATE_ERROR,
= > > +	UFSHCD_STATE_OPERATIONAL,
= > > +};
= > > +
= > > +/* UFSHCD error handling flags */
= > > +enum {
= > > +	UFSHCD_EH_IN_PROGRESS = (1 << 0),
= > >  };
= > >
= > >  /* Interrupt configuration options */ @@ -87,6 +92,16 @@ enum {
= > >  	INT_AGGR_CONFIG,
= > >  };
= > >
= > > +#define ufshcd_set_eh_in_progress(h) \
= > > +	(h->eh_flags |= UFSHCD_EH_IN_PROGRESS) #define
= > > +ufshcd_eh_in_progress(h) \
= > > +	(h->eh_flags & UFSHCD_EH_IN_PROGRESS) #define
= > > +ufshcd_clear_eh_in_progress(h) \
= > > +	(h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS)
= > > +
= > > +static void ufshcd_tmc_handler(struct ufs_hba *hba); static void
= > > +ufshcd_async_scan(void *data, async_cookie_t cookie);
= > > +
= > >  /*
= > >   * ufshcd_wait_for_register - wait for register value to change
= > >   * @hba - per-adapter interface
= > > @@ -840,10 +855,25 @@ static int ufshcd_queuecommand(struct
= > Scsi_Host
= > > *host, struct scsi_cmnd *cmd)
= > >
= > >  	tag = cmd->request->tag;
= > >
= > > -	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
= > > +	spin_lock_irqsave(hba->host->host_lock, flags);
= > > +	switch (hba->ufshcd_state) {
= > > +	case UFSHCD_STATE_OPERATIONAL:
= > > +		break;
= > > +	case UFSHCD_STATE_RESET:
= > >  		err = SCSI_MLQUEUE_HOST_BUSY;
= > > -		goto out;
= > > +		goto out_unlock;
= > > +	case UFSHCD_STATE_ERROR:
= > > +		set_host_byte(cmd, DID_ERROR);
= > > +		cmd->scsi_done(cmd);
= > > +		goto out_unlock;
= > > +	default:
= > > +		dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
= > > +				__func__, hba->ufshcd_state);
= > > +		set_host_byte(cmd, DID_BAD_TARGET);
= > > +		cmd->scsi_done(cmd);
= > > +		goto out_unlock;
= > >  	}
= > > +	spin_unlock_irqrestore(hba->host->host_lock, flags);
= > >
= > >  	/* acquire the tag to make sure device cmds don't use it */
= > >  	if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) { @@ -880,6
= > +910,7
= > > @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct
= > > scsi_cmnd *cmd)
= > >  	/* issue command to the controller */
= > >  	spin_lock_irqsave(hba->host->host_lock, flags);
= > >  	ufshcd_send_command(hba, tag);
= > > +out_unlock:
= > >  	spin_unlock_irqrestore(hba->host->host_lock, flags);
= > >  out:
= > >  	return err;
= > > @@ -1495,8 +1526,6 @@ static int
= > ufshcd_make_hba_operational(struct
= > > ufs_hba *hba)
= > >  	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
= > >  		scsi_unblock_requests(hba->host);
= > >
= > > -	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
= > > -
= > >  out:
= > >  	return err;
= > >  }
= > > @@ -2245,8 +2274,12 @@ static void ufshcd_err_handler(struct ufs_hba
= > *hba)
= > >  	}
= > >  	return;
= > >  fatal_eh:
= > > -	hba->ufshcd_state = UFSHCD_STATE_ERROR;
= > > -	schedule_work(&hba->feh_workq);
= > > +	/* handle fatal errors only when link is functional */
= > > +	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
= > > +		/* block commands at driver layer until error is handled */
= > > +		hba->ufshcd_state = UFSHCD_STATE_ERROR;
= > > +		schedule_work(&hba->feh_workq);
= > > +	}
= > >  }
= > >
= > >  /**
= > > @@ -2411,12 +2444,13 @@ static int ufshcd_issue_tm_cmd(struct
= > ufs_hba
= > > *hba, int lun_id, int task_id,  }
= > >
= > >  /**
= > > - * ufshcd_device_reset - reset device and abort all the pending
= > > commands
= > > + * ufshcd_eh_device_reset_handler - device reset handler registered
to
= > > + *                                    scsi layer.
= > >   * @cmd: SCSI command pointer
= > >   *
= > >   * Returns SUCCESS/FAILED
= > >   */
= > > -static int ufshcd_device_reset(struct scsi_cmnd *cmd)
= > > +static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
= > >  {
= > >  	struct Scsi_Host *host;
= > >  	struct ufs_hba *hba;
= > > @@ -2425,6 +2459,7 @@ static int ufshcd_device_reset(struct
= > scsi_cmnd
= > > *cmd)
= > >  	int err;
= > >  	u8 resp = 0xF;
= > >  	struct ufshcd_lrb *lrbp;
= > > +	unsigned long flags;
= > >
= > >  	host = cmd->device->host;
= > >  	hba = shost_priv(host);
= > > @@ -2433,55 +2468,33 @@ static int ufshcd_device_reset(struct
= > > scsi_cmnd
= > > *cmd)
= > >  	lrbp = &hba->lrb[tag];
= > >  	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0,
= > UFS_LOGICAL_RESET, &resp);
= > >  	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
= > > -		err = FAILED;
= > > +		if (!err)
= > > +			err = resp;
= > >  		goto out;
= > > -	} else {
= > > -		err = SUCCESS;
= > >  	}
= > >
= > > -	for (pos = 0; pos < hba->nutrs; pos++) {
= > > -		if (test_bit(pos, &hba->outstanding_reqs) &&
= > > -		    (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
= > > -
= > > -			/* clear the respective UTRLCLR register bit */
= > > -			ufshcd_utrl_clear(hba, pos);
= > > -
= > > -			clear_bit(pos, &hba->outstanding_reqs);
= > > -
= > > -			if (hba->lrb[pos].cmd) {
= > > -				scsi_dma_unmap(hba->lrb[pos].cmd);
= > > -				hba->lrb[pos].cmd->result =
= > > -					DID_ABORT << 16;
= > > -				hba->lrb[pos].cmd->scsi_done(cmd);
= > > -				hba->lrb[pos].cmd = NULL;
= > > -				clear_bit_unlock(pos, &hba->lrb_in_use);
= > > -				wake_up(&hba->dev_cmd.tag_wq);
= > > -			}
= > > +	/* clear the commands that were pending for corresponding LUN
= > */
= > > +	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
= > > +		if (hba->lrb[pos].lun == lrbp->lun) {
= > > +			err = ufshcd_clear_cmd(hba, pos);
= > > +			if (err)
= > > +				break;
= > >  		}
= > > -	} /* end of for */
= > > +	}
= > > +	spin_lock_irqsave(host->host_lock, flags);
= > > +	ufshcd_transfer_req_compl(hba);
= > > +	spin_unlock_irqrestore(host->host_lock, flags);
= > >  out:
= > > +	if (!err) {
= > > +		err = SUCCESS;
= > > +	} else {
= > > +		dev_err(hba->dev, "%s: failed with err %d\n", __func__,
= > err);
= > > +		err = FAILED;
= > > +	}
= > >  	return err;
= > >  }
= > >
= > >  /**
= > > - * ufshcd_host_reset - Main reset function registered with scsi layer
= > > - * @cmd: SCSI command pointer
= > > - *
= > > - * Returns SUCCESS/FAILED
= > > - */
= > > -static int ufshcd_host_reset(struct scsi_cmnd *cmd) -{
= > > -	struct ufs_hba *hba;
= > > -
= > > -	hba = shost_priv(cmd->device->host);
= > > -
= > > -	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
= > > -		return SUCCESS;
= > > -
= > > -	return ufshcd_do_reset(hba);
= > > -}
= > > -
= > > -/**
= > >   * ufshcd_abort - abort a specific command
= > >   * @cmd: SCSI command pointer
= > >   *
= > > @@ -2579,6 +2592,122 @@ out:
= > >  }
= > >
= > >  /**
= > > + * ufshcd_host_reset_and_restore - reset and restore host controller
= > > + * @hba: per-adapter instance
= > > + *
= > > + * Note that host controller reset may issue DME_RESET to
= > > + * local and remote (device) Uni-Pro stack and the attributes
= > > + * are reset to default state.
= > > + *
= > > + * Returns zero on success, non-zero on failure  */ static int
= > > +ufshcd_host_reset_and_restore(struct ufs_hba *hba) {
= > > +	int err;
= > > +	async_cookie_t cookie;
= > > +	unsigned long flags;
= > > +
= > > +	/* Reset the host controller */
= > > +	spin_lock_irqsave(hba->host->host_lock, flags);
= > > +	ufshcd_hba_stop(hba);
= > > +	spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +
= > > +	err = ufshcd_hba_enable(hba);
= > > +	if (err)
= > > +		goto out;
= > > +
= > > +	/* Establish the link again and restore the device */
= > > +	cookie = async_schedule(ufshcd_async_scan, hba);
= > > +	/* wait for async scan to be completed */
= > > +	async_synchronize_cookie(++cookie);
= > > +	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
= > > +		err = -EIO;
= > > +out:
= > > +	if (err)
= > > +		dev_err(hba->dev, "%s: Host init failed %d\n", __func__,
= > err);
= > > +
= > > +	return err;
= > > +}
= > > +
= > > +/**
= > > + * ufshcd_reset_and_restore - reset and re-initialize host/device
= > > + * @hba: per-adapter instance
= > > + *
= > > + * Reset and recover device, host and re-establish link. This
= > > + * is helpful to recover the communication in fatal error conditions.
= > > + *
= > > + * Returns zero on success, non-zero on failure  */ static int
= > > +ufshcd_reset_and_restore(struct ufs_hba *hba) {
= > > +	int err = 0;
= > > +	unsigned long flags;
= > > +
= > > +	err = ufshcd_host_reset_and_restore(hba);
= > > +
= > > +	/*
= > > +	 * After reset the door-bell might be cleared, complete
= > > +	 * outstanding requests in s/w here.
= > > +	 */
= > > +	spin_lock_irqsave(hba->host->host_lock, flags);
= > > +	ufshcd_transfer_req_compl(hba);
= > > +	ufshcd_tmc_handler(hba);
= > > +	spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +
= > > +	return err;
= > > +}
= > > +
= > > +/**
= > > + * ufshcd_eh_host_reset_handler - host reset handler registered to
= > > +scsi
= > > layer
= > > + * @cmd - SCSI command pointer
= > > + *
= > > + * Returns SUCCESS/FAILED
= > > + */
= > > +static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) {
= > > +	int err;
= > > +	unsigned long flags;
= > > +	struct ufs_hba *hba;
= > > +
= > > +	hba = shost_priv(cmd->device->host);
= > > +
= > > +	/*
= > > +	 * Check if there is any race with fatal error handling.
= > > +	 * If so, wait for it to complete. Even though fatal error
= > > +	 * handling does reset and restore in some cases, don't assume
= > > +	 * anything out of it. We are just avoiding race here.
= > > +	 */
= > > +	do {
= > > +		spin_lock_irqsave(hba->host->host_lock, flags);
= > > +		if (!(work_pending(&hba->feh_workq) ||
= > > +				hba->ufshcd_state ==
= > UFSHCD_STATE_RESET))
= > > +			break;
= > > +		spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
= > > +		flush_work_sync(&hba->feh_workq);
= > > +	} while (1);
= > > +
= > > +	hba->ufshcd_state = UFSHCD_STATE_RESET;
= > > +	ufshcd_set_eh_in_progress(hba);
= > > +	spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +
= > > +	err = ufshcd_reset_and_restore(hba);
= > > +
= > > +	spin_lock_irqsave(hba->host->host_lock, flags);
= > > +	if (!err) {
= > > +		err = SUCCESS;
= > > +		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
= > > +	} else {
= > > +		err = FAILED;
= > > +		hba->ufshcd_state = UFSHCD_STATE_ERROR;
= > > +	}
= > > +	ufshcd_clear_eh_in_progress(hba);
= > > +	spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +
= > > +	return err;
= > > +}
= > > +
= > > +/**
= > >   * ufshcd_async_scan - asynchronous execution for link startup
= > >   * @data: data pointer to pass to this function
= > >   * @cookie: cookie data
= > > @@ -2601,8 +2730,13 @@ static void ufshcd_async_scan(void *data,
= > > async_cookie_t cookie)
= > >  		goto out;
= > >
= > >  	ufshcd_force_reset_auto_bkops(hba);
= > > -	scsi_scan_host(hba->host);
= > > -	pm_runtime_put_sync(hba->dev);
= > > +	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
= > > +
= > > +	/* If we are in error handling context no need to scan the host */
= > > +	if (!ufshcd_eh_in_progress(hba)) {
= > > +		scsi_scan_host(hba->host);
= > > +		pm_runtime_put_sync(hba->dev);
= > > +	}
= > >  out:
= > >  	return;
= > >  }
= > > @@ -2615,8 +2749,8 @@ static struct scsi_host_template
= > > ufshcd_driver_template = {
= > >  	.slave_alloc		= ufshcd_slave_alloc,
= > >  	.slave_destroy		= ufshcd_slave_destroy,
= > >  	.eh_abort_handler	= ufshcd_abort,
= > > -	.eh_device_reset_handler = ufshcd_device_reset,
= > > -	.eh_host_reset_handler	= ufshcd_host_reset,
= > > +	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
= > > +	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
= > >  	.this_id		= -1,
= > >  	.sg_tablesize		= SG_ALL,
= > >  	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
= > > diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
= > > index fe7c947..1e0585c 100644
= > > --- a/drivers/scsi/ufs/ufshcd.h
= > > +++ b/drivers/scsi/ufs/ufshcd.h
= > > @@ -179,6 +179,7 @@ struct ufs_dev_cmd {
= > >   * @tm_condition: condition variable for task management
= > >   * @tm_slots_in_use: bit map of task management request slots in use
= > >   * @ufshcd_state: UFSHCD states
= > > + * @eh_flags: Error handling flags
= > >   * @intr_mask: Interrupt Mask Bits
= > >   * @ee_ctrl_mask: Exception event control mask
= > >   * @feh_workq: Work queue for fatal controller error handling @@
= > > -224,6 +225,7 @@ struct ufs_hba {
= > >  	unsigned long tm_slots_in_use;
= > >
= > >  	u32 ufshcd_state;
= > > +	u32 eh_flags;
= > >  	u32 intr_mask;
= > >  	u16 ee_ctrl_mask;
= > >
= > > --
= > > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
= > > member of Code Aurora Forum, hosted by The Linux Foundation.
= > >
= > > --
= > > To unsubscribe from this list: send the line "unsubscribe linux-scsi"
= > > in the body of a message to majordomo@xxxxxxxxxxxxxxx More
= > majordomo
= > > info at  http://vger.kernel.org/majordomo-info.html
= > >
= > 
= > 
= > --
= > QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a
= > member of Code Aurora Forum, hosted by The Linux Foundation
= > 
= > --
= > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the
= > body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info
= > at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [Linux for Sparc]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux