Hi Brian, Thanks for reviewing. All good suggestions that I am fine with implementing. As these are fairly minor, would you be okay with these being made in a separate ‘fix' patch series? -matt > On Aug 17, 2015, at 9:38 AM, Brian King <brking@xxxxxxxxxxxxxxxxxx> wrote: > > On 08/13/2015 09:47 PM, Matthew R. Ochs wrote: >> --- a/drivers/scsi/cxlflash/common.h >> +++ b/drivers/scsi/cxlflash/common.h >> @@ -76,6 +76,12 @@ enum cxlflash_init_state { >> INIT_STATE_SCSI >> }; >> >> +enum cxlflash_state { >> + STATE_NORMAL, /* Normal running state, everything good */ >> + STATE_LIMBO, /* Limbo running state, trying to reset/recover */ > > Might be more clear to call this STATE_RESETTING or STATE_RECOVERY > >> + STATE_FAILTERM /* Failed/terminating state, error out users/threads */ >> +}; >> + >> /* >> * Each context has its own set of resource handles that is visible >> * only from that context. > >> @@ -105,7 +109,8 @@ struct cxlflash_cfg { >> >> wait_queue_head_t tmf_waitq; >> bool tmf_active; >> - u8 err_recovery_active:1; >> + wait_queue_head_t limbo_waitq; > > How about reset_waitq instead? > >> + enum cxlflash_state state; >> }; >> >> struct afu_cmd { > >> @@ -455,9 +471,21 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) >> get_unaligned_be32(&((u32 *)scp->cmnd)[2]), >> get_unaligned_be32(&((u32 *)scp->cmnd)[3])); >> >> - rcr = send_tmf(afu, scp, TMF_LUN_RESET); >> - if (unlikely(rcr)) >> + switch (cfg->state) { >> + case STATE_NORMAL: >> + rcr = send_tmf(afu, scp, TMF_LUN_RESET); >> + if (unlikely(rcr)) >> + rc = FAILED; >> + break; >> + case STATE_LIMBO: >> + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); >> + if (cfg->state == STATE_NORMAL) >> + break; > > In this case you've been asked to do a LUN reset but didn't actually reset the LUN. I'd suggest > restructuring this switch statement to send the LUN reset TMF in the case where you had > to wait for an AFU reset to complete. > >> + /* fall through */ >> + default: >> rc = FAILED; >> + break; >> + } >> >> pr_debug("%s: returning rc=%d\n", __func__, rc); >> return rc; >> @@ -487,11 +515,29 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) >> get_unaligned_be32(&((u32 *)scp->cmnd)[2]), >> get_unaligned_be32(&((u32 *)scp->cmnd)[3])); >> >> - rcr = cxlflash_afu_reset(cfg); >> - if (rcr == 0) >> - rc = SUCCESS; >> - else >> + switch (cfg->state) { >> + case STATE_NORMAL: >> + cfg->state = STATE_LIMBO; >> + scsi_block_requests(cfg->host); >> + >> + rcr = cxlflash_afu_reset(cfg); >> + if (rcr) { >> + rc = FAILED; >> + cfg->state = STATE_FAILTERM; >> + } else >> + cfg->state = STATE_NORMAL; >> + wake_up_all(&cfg->limbo_waitq); >> + scsi_unblock_requests(cfg->host); > > The scsi_block_requests / scsi_unblock_requests is not necessary in this path, since SCSI EH > will already be preventing any new commands being issued via queuecommand. > >> + break; >> + case STATE_LIMBO: >> + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); >> + if (cfg->state == STATE_NORMAL) >> + break; >> + /* fall through */ >> + default: >> rc = FAILED; >> + break; >> + } >> >> pr_debug("%s: returning rc=%d\n", __func__, rc); >> return rc; >> @@ -642,7 +688,7 @@ static void cxlflash_wait_for_pci_err_recovery(struct cxlflash_cfg *cfg) >> struct pci_dev *pdev = cfg->dev; >> >> if (pci_channel_offline(pdev)) >> - wait_event_timeout(cfg->eeh_waitq, >> + wait_event_timeout(cfg->limbo_waitq, >> !pci_channel_offline(pdev), >> CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT); >> } >> @@ -825,6 +871,8 @@ static void cxlflash_remove(struct pci_dev *pdev) >> !cfg->tmf_active); >> spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); >> >> + cfg->state = STATE_FAILTERM; > > I don't see any locking around the setting or reading of this flag. What are the > implications if the processor reorders the store to change this state either here > or elsewhere. Same goes for the load associated with checking the state. > >> + >> switch (cfg->init_state) { >> case INIT_STATE_SCSI: >> scsi_remove_host(cfg->host); >> @@ -1879,6 +1927,8 @@ static int init_afu(struct cxlflash_cfg *cfg) >> struct afu *afu = cfg->afu; >> struct device *dev = &cfg->dev->dev; >> >> + cxl_perst_reloads_same_image(cfg->cxl_afu, true); >> + >> rc = init_mc(cfg); >> if (rc) { >> dev_err(dev, "%s: call to init_mc failed, rc=%d!\n", > > Reviewed-by: Brian King <brking@xxxxxxxxxxxxxxxxxx> > > -- > Brian King > Power Linux I/O > IBM Linux Technology Center > > -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html