Reviewed-by: Yaniv Gardi <ygardi@xxxxxxxxxxxxxx> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation = > -----Original Message----- = > From: linux-scsi-owner@xxxxxxxxxxxxxxx [mailto:linux-scsi- = > owner@xxxxxxxxxxxxxxx] On Behalf Of Dolev Raviv = > Sent: Monday, August 12, 2013 4:02 PM = > To: Sujit Reddy Thumma = > Cc: Vinayak Holikatti; Santosh Y; James E.J. Bottomley; linux- = > scsi@xxxxxxxxxxxxxxx; Sujit Reddy Thumma; linux-arm- = > msm@xxxxxxxxxxxxxxx = > Subject: Re: [PATCH V5 3/4] scsi: ufs: Fix device and host reset methods = > = > Tested-by: Dolev Raviv <draviv@xxxxxxxxxxxxxx> = > = > > As of now SCSI initiated error handling is broken because, the reset = > > APIs don't try to bring back the device initialized and ready for = > > further transfers. = > > = > > In case of timeouts, the scsi error handler takes care of handling = > > aborts and resets. Improve the error handling in such scenario by = > > resetting the device and host and re-initializing them in proper manner. = > > = > > Signed-off-by: Sujit Reddy Thumma <sthumma@xxxxxxxxxxxxxx> = > > --- = > > drivers/scsi/ufs/ufshcd.c | 240 = > > +++++++++++++++++++++++++++++++++++---------- = > > drivers/scsi/ufs/ufshcd.h | 2 + = > > 2 files changed, 189 insertions(+), 53 deletions(-) = > > = > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c = > > index d4ee48d..c577e6e 100644 = > > --- a/drivers/scsi/ufs/ufshcd.c = > > +++ b/drivers/scsi/ufs/ufshcd.c = > > @@ -69,9 +69,14 @@ enum { = > > = > > /* UFSHCD states */ = > > enum { = > > - UFSHCD_STATE_OPERATIONAL, = > > UFSHCD_STATE_RESET, = > > UFSHCD_STATE_ERROR, = > > + UFSHCD_STATE_OPERATIONAL, = > > +}; = > > + = > > +/* UFSHCD error handling flags */ = > > +enum { = > > + UFSHCD_EH_IN_PROGRESS = (1 << 0), = > > }; = > > = > > /* Interrupt configuration options */ @@ -87,6 +92,16 @@ enum { = > > INT_AGGR_CONFIG, = > > }; = > > = > > +#define ufshcd_set_eh_in_progress(h) \ = > > + (h->eh_flags |= UFSHCD_EH_IN_PROGRESS) #define = > > +ufshcd_eh_in_progress(h) \ = > > + (h->eh_flags & UFSHCD_EH_IN_PROGRESS) #define = > > +ufshcd_clear_eh_in_progress(h) \ = > > + (h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS) = > > + = > > +static void ufshcd_tmc_handler(struct ufs_hba *hba); static void = > > +ufshcd_async_scan(void *data, async_cookie_t cookie); = > > + = > > /* = > > * ufshcd_wait_for_register - wait for register value to change = > > * @hba - per-adapter interface = > > @@ -840,10 +855,25 @@ static int ufshcd_queuecommand(struct = > Scsi_Host = > > *host, struct scsi_cmnd *cmd) = > > = > > tag = cmd->request->tag; = > > = > > - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) { = > > + spin_lock_irqsave(hba->host->host_lock, flags); = > > + switch (hba->ufshcd_state) { = > > + case UFSHCD_STATE_OPERATIONAL: = > > + break; = > > + case UFSHCD_STATE_RESET: = > > err = SCSI_MLQUEUE_HOST_BUSY; = > > - goto out; = > > + goto out_unlock; = > > + case UFSHCD_STATE_ERROR: = > > + set_host_byte(cmd, DID_ERROR); = > > + cmd->scsi_done(cmd); = > > + goto out_unlock; = > > + default: = > > + dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n", = > > + __func__, hba->ufshcd_state); = > > + set_host_byte(cmd, DID_BAD_TARGET); = > > + cmd->scsi_done(cmd); = > > + goto out_unlock; = > > } = > > + spin_unlock_irqrestore(hba->host->host_lock, flags); = > > = > > /* acquire the tag to make sure device cmds don't use it */ = > > if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) { @@ -880,6 = > +910,7 = > > @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct = > > scsi_cmnd *cmd) = > > /* issue command to the controller */ = > > spin_lock_irqsave(hba->host->host_lock, flags); = > > ufshcd_send_command(hba, tag); = > > +out_unlock: = > > spin_unlock_irqrestore(hba->host->host_lock, flags); = > > out: = > > return err; = > > @@ -1495,8 +1526,6 @@ static int = > ufshcd_make_hba_operational(struct = > > ufs_hba *hba) = > > if (hba->ufshcd_state == UFSHCD_STATE_RESET) = > > scsi_unblock_requests(hba->host); = > > = > > - hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; = > > - = > > out: = > > return err; = > > } = > > @@ -2245,8 +2274,12 @@ static void ufshcd_err_handler(struct ufs_hba = > *hba) = > > } = > > return; = > > fatal_eh: = > > - hba->ufshcd_state = UFSHCD_STATE_ERROR; = > > - schedule_work(&hba->feh_workq); = > > + /* handle fatal errors only when link is functional */ = > > + if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { = > > + /* block commands at driver layer until error is handled */ = > > + hba->ufshcd_state = UFSHCD_STATE_ERROR; = > > + schedule_work(&hba->feh_workq); = > > + } = > > } = > > = > > /** = > > @@ -2411,12 +2444,13 @@ static int ufshcd_issue_tm_cmd(struct = > ufs_hba = > > *hba, int lun_id, int task_id, } = > > = > > /** = > > - * ufshcd_device_reset - reset device and abort all the pending = > > commands = > > + * ufshcd_eh_device_reset_handler - device reset handler registered to = > > + * scsi layer. = > > * @cmd: SCSI command pointer = > > * = > > * Returns SUCCESS/FAILED = > > */ = > > -static int ufshcd_device_reset(struct scsi_cmnd *cmd) = > > +static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) = > > { = > > struct Scsi_Host *host; = > > struct ufs_hba *hba; = > > @@ -2425,6 +2459,7 @@ static int ufshcd_device_reset(struct = > scsi_cmnd = > > *cmd) = > > int err; = > > u8 resp = 0xF; = > > struct ufshcd_lrb *lrbp; = > > + unsigned long flags; = > > = > > host = cmd->device->host; = > > hba = shost_priv(host); = > > @@ -2433,55 +2468,33 @@ static int ufshcd_device_reset(struct = > > scsi_cmnd = > > *cmd) = > > lrbp = &hba->lrb[tag]; = > > err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, = > UFS_LOGICAL_RESET, &resp); = > > if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) { = > > - err = FAILED; = > > + if (!err) = > > + err = resp; = > > goto out; = > > - } else { = > > - err = SUCCESS; = > > } = > > = > > - for (pos = 0; pos < hba->nutrs; pos++) { = > > - if (test_bit(pos, &hba->outstanding_reqs) && = > > - (hba->lrb[tag].lun == hba->lrb[pos].lun)) { = > > - = > > - /* clear the respective UTRLCLR register bit */ = > > - ufshcd_utrl_clear(hba, pos); = > > - = > > - clear_bit(pos, &hba->outstanding_reqs); = > > - = > > - if (hba->lrb[pos].cmd) { = > > - scsi_dma_unmap(hba->lrb[pos].cmd); = > > - hba->lrb[pos].cmd->result = = > > - DID_ABORT << 16; = > > - hba->lrb[pos].cmd->scsi_done(cmd); = > > - hba->lrb[pos].cmd = NULL; = > > - clear_bit_unlock(pos, &hba->lrb_in_use); = > > - wake_up(&hba->dev_cmd.tag_wq); = > > - } = > > + /* clear the commands that were pending for corresponding LUN = > */ = > > + for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { = > > + if (hba->lrb[pos].lun == lrbp->lun) { = > > + err = ufshcd_clear_cmd(hba, pos); = > > + if (err) = > > + break; = > > } = > > - } /* end of for */ = > > + } = > > + spin_lock_irqsave(host->host_lock, flags); = > > + ufshcd_transfer_req_compl(hba); = > > + spin_unlock_irqrestore(host->host_lock, flags); = > > out: = > > + if (!err) { = > > + err = SUCCESS; = > > + } else { = > > + dev_err(hba->dev, "%s: failed with err %d\n", __func__, = > err); = > > + err = FAILED; = > > + } = > > return err; = > > } = > > = > > /** = > > - * ufshcd_host_reset - Main reset function registered with scsi layer = > > - * @cmd: SCSI command pointer = > > - * = > > - * Returns SUCCESS/FAILED = > > - */ = > > -static int ufshcd_host_reset(struct scsi_cmnd *cmd) -{ = > > - struct ufs_hba *hba; = > > - = > > - hba = shost_priv(cmd->device->host); = > > - = > > - if (hba->ufshcd_state == UFSHCD_STATE_RESET) = > > - return SUCCESS; = > > - = > > - return ufshcd_do_reset(hba); = > > -} = > > - = > > -/** = > > * ufshcd_abort - abort a specific command = > > * @cmd: SCSI command pointer = > > * = > > @@ -2579,6 +2592,122 @@ out: = > > } = > > = > > /** = > > + * ufshcd_host_reset_and_restore - reset and restore host controller = > > + * @hba: per-adapter instance = > > + * = > > + * Note that host controller reset may issue DME_RESET to = > > + * local and remote (device) Uni-Pro stack and the attributes = > > + * are reset to default state. = > > + * = > > + * Returns zero on success, non-zero on failure */ static int = > > +ufshcd_host_reset_and_restore(struct ufs_hba *hba) { = > > + int err; = > > + async_cookie_t cookie; = > > + unsigned long flags; = > > + = > > + /* Reset the host controller */ = > > + spin_lock_irqsave(hba->host->host_lock, flags); = > > + ufshcd_hba_stop(hba); = > > + spin_unlock_irqrestore(hba->host->host_lock, flags); = > > + = > > + err = ufshcd_hba_enable(hba); = > > + if (err) = > > + goto out; = > > + = > > + /* Establish the link again and restore the device */ = > > + cookie = async_schedule(ufshcd_async_scan, hba); = > > + /* wait for async scan to be completed */ = > > + async_synchronize_cookie(++cookie); = > > + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) = > > + err = -EIO; = > > +out: = > > + if (err) = > > + dev_err(hba->dev, "%s: Host init failed %d\n", __func__, = > err); = > > + = > > + return err; = > > +} = > > + = > > +/** = > > + * ufshcd_reset_and_restore - reset and re-initialize host/device = > > + * @hba: per-adapter instance = > > + * = > > + * Reset and recover device, host and re-establish link. This = > > + * is helpful to recover the communication in fatal error conditions. = > > + * = > > + * Returns zero on success, non-zero on failure */ static int = > > +ufshcd_reset_and_restore(struct ufs_hba *hba) { = > > + int err = 0; = > > + unsigned long flags; = > > + = > > + err = ufshcd_host_reset_and_restore(hba); = > > + = > > + /* = > > + * After reset the door-bell might be cleared, complete = > > + * outstanding requests in s/w here. = > > + */ = > > + spin_lock_irqsave(hba->host->host_lock, flags); = > > + ufshcd_transfer_req_compl(hba); = > > + ufshcd_tmc_handler(hba); = > > + spin_unlock_irqrestore(hba->host->host_lock, flags); = > > + = > > + return err; = > > +} = > > + = > > +/** = > > + * ufshcd_eh_host_reset_handler - host reset handler registered to = > > +scsi = > > layer = > > + * @cmd - SCSI command pointer = > > + * = > > + * Returns SUCCESS/FAILED = > > + */ = > > +static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) { = > > + int err; = > > + unsigned long flags; = > > + struct ufs_hba *hba; = > > + = > > + hba = shost_priv(cmd->device->host); = > > + = > > + /* = > > + * Check if there is any race with fatal error handling. = > > + * If so, wait for it to complete. Even though fatal error = > > + * handling does reset and restore in some cases, don't assume = > > + * anything out of it. We are just avoiding race here. = > > + */ = > > + do { = > > + spin_lock_irqsave(hba->host->host_lock, flags); = > > + if (!(work_pending(&hba->feh_workq) || = > > + hba->ufshcd_state == = > UFSHCD_STATE_RESET)) = > > + break; = > > + spin_unlock_irqrestore(hba->host->host_lock, flags); = > > + dev_dbg(hba->dev, "%s: reset in progress\n", __func__); = > > + flush_work_sync(&hba->feh_workq); = > > + } while (1); = > > + = > > + hba->ufshcd_state = UFSHCD_STATE_RESET; = > > + ufshcd_set_eh_in_progress(hba); = > > + spin_unlock_irqrestore(hba->host->host_lock, flags); = > > + = > > + err = ufshcd_reset_and_restore(hba); = > > + = > > + spin_lock_irqsave(hba->host->host_lock, flags); = > > + if (!err) { = > > + err = SUCCESS; = > > + hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; = > > + } else { = > > + err = FAILED; = > > + hba->ufshcd_state = UFSHCD_STATE_ERROR; = > > + } = > > + ufshcd_clear_eh_in_progress(hba); = > > + spin_unlock_irqrestore(hba->host->host_lock, flags); = > > + = > > + return err; = > > +} = > > + = > > +/** = > > * ufshcd_async_scan - asynchronous execution for link startup = > > * @data: data pointer to pass to this function = > > * @cookie: cookie data = > > @@ -2601,8 +2730,13 @@ static void ufshcd_async_scan(void *data, = > > async_cookie_t cookie) = > > goto out; = > > = > > ufshcd_force_reset_auto_bkops(hba); = > > - scsi_scan_host(hba->host); = > > - pm_runtime_put_sync(hba->dev); = > > + hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; = > > + = > > + /* If we are in error handling context no need to scan the host */ = > > + if (!ufshcd_eh_in_progress(hba)) { = > > + scsi_scan_host(hba->host); = > > + pm_runtime_put_sync(hba->dev); = > > + } = > > out: = > > return; = > > } = > > @@ -2615,8 +2749,8 @@ static struct scsi_host_template = > > ufshcd_driver_template = { = > > .slave_alloc = ufshcd_slave_alloc, = > > .slave_destroy = ufshcd_slave_destroy, = > > .eh_abort_handler = ufshcd_abort, = > > - .eh_device_reset_handler = ufshcd_device_reset, = > > - .eh_host_reset_handler = ufshcd_host_reset, = > > + .eh_device_reset_handler = ufshcd_eh_device_reset_handler, = > > + .eh_host_reset_handler = ufshcd_eh_host_reset_handler, = > > .this_id = -1, = > > .sg_tablesize = SG_ALL, = > > .cmd_per_lun = UFSHCD_CMD_PER_LUN, = > > diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h = > > index fe7c947..1e0585c 100644 = > > --- a/drivers/scsi/ufs/ufshcd.h = > > +++ b/drivers/scsi/ufs/ufshcd.h = > > @@ -179,6 +179,7 @@ struct ufs_dev_cmd { = > > * @tm_condition: condition variable for task management = > > * @tm_slots_in_use: bit map of task management request slots in use = > > * @ufshcd_state: UFSHCD states = > > + * @eh_flags: Error handling flags = > > * @intr_mask: Interrupt Mask Bits = > > * @ee_ctrl_mask: Exception event control mask = > > * @feh_workq: Work queue for fatal controller error handling @@ = > > -224,6 +225,7 @@ struct ufs_hba { = > > unsigned long tm_slots_in_use; = > > = > > u32 ufshcd_state; = > > + u32 eh_flags; = > > u32 intr_mask; = > > u16 ee_ctrl_mask; = > > = > > -- = > > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a = > > member of Code Aurora Forum, hosted by The Linux Foundation. = > > = > > -- = > > To unsubscribe from this list: send the line "unsubscribe linux-scsi" = > > in the body of a message to majordomo@xxxxxxxxxxxxxxx More = > majordomo = > > info at http://vger.kernel.org/majordomo-info.html = > > = > = > = > -- = > QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a = > member of Code Aurora Forum, hosted by The Linux Foundation = > = > -- = > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the = > body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info = > at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html