On 09/16/2015 04:27 PM, Matthew R. Ochs wrote: > --- a/drivers/scsi/cxlflash/main.c > +++ b/drivers/scsi/cxlflash/main.c > @@ -2311,6 +2311,7 @@ static int cxlflash_probe(struct pci_dev *pdev, > cfg->lr_port = -1; > mutex_init(&cfg->ctx_tbl_list_mutex); > mutex_init(&cfg->ctx_recovery_mutex); > + init_rwsem(&cfg->ioctl_rwsem); > INIT_LIST_HEAD(&cfg->ctx_err_recovery); > INIT_LIST_HEAD(&cfg->lluns); > > @@ -2365,6 +2366,19 @@ out_remove: > } > > /** > + * drain_ioctls() - wait until all currently executing ioctls have completed > + * @cfg: Internal structure associated with the host. > + * > + * Obtain write access to read/write semaphore that wraps ioctl > + * handling to 'drain' ioctls currently executing. > + */ > +static void drain_ioctls(struct cxlflash_cfg *cfg) > +{ > + down_write(&cfg->ioctl_rwsem); > + up_write(&cfg->ioctl_rwsem); > +} > + > +/** > * cxlflash_pci_error_detected() - called when a PCI error is detected > * @pdev: PCI device struct. > * @state: PCI channel state. > @@ -2383,16 +2397,14 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, > switch (state) { > case pci_channel_io_frozen: > cfg->state = STATE_LIMBO; > - > - /* Turn off legacy I/O */ > scsi_block_requests(cfg->host); > + drain_ioctls(cfg); So, what kicks any outstanding ioctls back? Let's assume you are in the middle of disk_attach and you've sent the READ_CAP16 to the device. It appears as if what would happen here is we'd sit here in cxlflash_pci_error_detected. Eventually, the READ_CAP16 would timeout. This would wake the SCSI error handler, and end up calling your eh_device_reset handler, which would see that we are in STATE_LIMBO, where it would then do a wait_event, waiting for us to get out of STATE_LIMBO, and we would end up in a deadlock. Rather than implementing a rw semaphore, would it be better to simply make the ioctls check the state we are in and either wait to get out of EEH state or fail themselves? > rc = cxlflash_mark_contexts_error(cfg); > if (unlikely(rc)) > dev_err(dev, "%s: Failed to mark user contexts!(%d)\n", > __func__, rc); > term_mc(cfg, UNDO_START); > stop_afu(cfg); > - > return PCI_ERS_RESULT_NEED_RESET; > case pci_channel_io_perm_failure: > cfg->state = STATE_FAILTERM; > diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c > index cf2a85d..8a18230 100644 > --- a/drivers/scsi/cxlflash/superpipe.c > +++ b/drivers/scsi/cxlflash/superpipe.c > @@ -1214,6 +1214,48 @@ static const struct file_operations null_fops = { > }; > > /** > + * check_state() - checks and responds to the current adapter state > + * @cfg: Internal structure associated with the host. > + * @ioctl: Indicates if on an ioctl thread. > + * > + * This routine can block and should only be used on process context. > + * When blocking on an ioctl thread, the ioctl read semaphore should be > + * let up to allow for draining actively running ioctls. Also note that > + * when waking up from waiting in reset, the state is unknown and must > + * be checked again before proceeding. > + * > + * Return: 0 on success, -errno on failure > + */ > +static int check_state(struct cxlflash_cfg *cfg, bool ioctl) All your callers appear to set the second parameter to true, so why bother having it? > +{ > + struct device *dev = &cfg->dev->dev; > + int rc = 0; > + > +retry: > + switch (cfg->state) { > + case STATE_LIMBO: > + dev_dbg(dev, "%s: Limbo state, going to wait...\n", __func__); > + if (ioctl) > + up_read(&cfg->ioctl_rwsem); > + rc = wait_event_interruptible(cfg->limbo_waitq, > + cfg->state != STATE_LIMBO); > + if (ioctl) > + down_read(&cfg->ioctl_rwsem); > + if (unlikely(rc)) > + break; > + goto retry; > + case STATE_FAILTERM: > + dev_dbg(dev, "%s: Failed/Terminating!\n", __func__); > + rc = -ENODEV; > + break; > + default: > + break; > + } > + > + return rc; > +} > + > +/** > * cxlflash_disk_attach() - attach a LUN to a context > * @sdev: SCSI device associated with LUN. > * @attach: Attach ioctl data structure. -- Brian King Power Linux I/O IBM Linux Technology Center -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html