The latest version of the patch kit is available here: git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4 Changes since v3: http://marc.info/?l=linux-scsi&m=132581455214789&w=2 1/ null pointer regression smp_execute_task: make sure device is not removed from the domain while eh is in flight. Fix up the logic that defers domain_device destruction to its own context. Folded into: "libsas: prevent domain rediscovery competing with ata error handling" plus a small prep patch: "libsas: convert dev->gone to flags" 2/ lockdep regression in smp_execute_task: failed to release the lock before returning. Folded into: "libsas: check for 'gone' expanders in smp_execute_task()" 3/ unnecessary locking in isci_ata_check_ready(). Folded into: "isci: ->lldd_ata_check_ready handler" 4/ two new fixes follow in separate mails [PATCH v4 1/2] libsas: pre-clean commands that won the eh vs completion race [PATCH v4 2/2] libsas: feed the scsi_block_when_processing_errors() meter Incremental diff: libsas-eh-reworks-v3..libsas-eh-reworks-v4 diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c index e795645..eca9ba0 100644 --- a/drivers/scsi/isci/port.c +++ b/drivers/scsi/isci/port.c @@ -1681,10 +1681,7 @@ int isci_ata_check_ready(struct domain_device *dev) if (test_bit(IPORT_RESET_PENDING, &iport->state)) goto out; - /* snapshot active phy mask */ - spin_lock_irqsave(&ihost->scic_lock, flags); rc = !!iport->active_phy_mask; - spin_unlock_irqrestore(&ihost->scic_lock, flags); out: isci_put_device(idev); diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index f90fdcf..a062adc 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -195,7 +195,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) spin_unlock(ap->lock); /* If the device fell off, no sense in issuing commands */ - if (dev->gone) + if (test_bit(SAS_DEV_GONE, &dev->state)) goto out; task = sas_alloc_task(GFP_ATOMIC); @@ -327,7 +327,7 @@ static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class, struct domain_device *dev = ap->private_data; struct sas_internal *i = dev_to_sas_internal(dev); - if (dev->gone) + if (test_bit(SAS_DEV_GONE, &dev->state)) return -ENODEV; res = i->dft->lldd_I_T_nexus_reset(dev); @@ -663,6 +663,11 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie) ata_port_printk(ap, KERN_DEBUG, "sas eh calling libata port error handler"); ata_scsi_port_error_handler(ha->core.shost, ap); + + /* tell scsi_block_when_processing_errors() waiters that we are + * still making forward progress + */ + wake_up(&ha->core.shost->host_wait); } void sas_ata_strategy_handler(struct Scsi_Host *shost) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 0b7555d..dfb0250 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -269,17 +269,22 @@ static void sas_destruct_devices(struct work_struct *work) clear_bit(DISCE_DESTRUCT, &port->disc.pending); - list_for_each_entry_safe(dev, n, &port->destroy_list, dev_list_node) { + list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) { + list_del_init(&dev->disco_list_node); + sas_remove_children(&dev->rphy->dev); sas_rphy_delete(dev->rphy); dev->rphy = NULL; sas_unregister_common_dev(port, dev); + + sas_put_device(dev); } } void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev) { - if (!list_empty(&dev->disco_list_node)) { + if (!test_bit(SAS_DEV_DESTROY, &dev->state) && + !list_empty(&dev->disco_list_node)) { /* this rphy never saw sas_rphy_add */ list_del_init(&dev->disco_list_node); sas_rphy_free(dev->rphy); @@ -287,13 +292,9 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev) sas_unregister_common_dev(port, dev); } - if (dev->rphy) { + if (dev->rphy && !test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) { sas_rphy_unlink(dev->rphy); - - spin_lock_irq(&port->dev_list_lock); - list_move_tail(&dev->dev_list_node, &port->destroy_list); - spin_unlock_irq(&port->dev_list_lock); - + list_move_tail(&dev->disco_list_node, &port->destroy_list); sas_discover_event(dev->port, DISCE_DESTRUCT); } } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index e47599b..68a80a0 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -74,8 +74,10 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size, mutex_lock(&dev->ex_dev.cmd_mutex); for (retry = 0; retry < 3; retry++) { - if (dev->gone) - return -ECOMM; + if (test_bit(SAS_DEV_GONE, &dev->state)) { + res = -ECOMM; + break; + } task = sas_alloc_task(GFP_KERNEL); if (!task) { @@ -1794,7 +1796,7 @@ static void sas_unregister_ex_tree(struct asd_sas_port *port, struct domain_devi struct domain_device *child, *n; list_for_each_entry_safe(child, n, &ex->children, siblings) { - child->gone = 1; + set_bit(SAS_DEV_GONE, &child->state); if (child->dev_type == EDGE_DEV || child->dev_type == FANOUT_DEV) sas_unregister_ex_tree(port, child); @@ -1815,7 +1817,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, &ex_dev->children, siblings) { if (SAS_ADDR(child->sas_addr) == SAS_ADDR(phy->attached_sas_addr)) { - child->gone = 1; + set_bit(SAS_DEV_GONE, &child->state); if (child->dev_type == EDGE_DEV || child->dev_type == FANOUT_DEV) sas_unregister_ex_tree(parent->port, child); diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index 36e2905..31adcd1 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -168,7 +168,7 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone) if (port->num_phys == 1) { if (dev && gone) - dev->gone = 1; + set_bit(SAS_DEV_GONE, &dev->state); sas_unregister_domain_devices(port); sas_port_delete(port->port); port->port = NULL; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 59a227d..731c892 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -208,7 +208,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) int res = 0; /* If the device fell off, no sense in issuing commands */ - if (dev->gone) { + if (test_bit(SAS_DEV_GONE, &dev->state)) { cmd->result = DID_BAD_TARGET << 16; goto out_done; } @@ -249,8 +249,8 @@ out_done: static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) { - struct sas_task *task = TO_SAS_TASK(cmd); struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host); + struct sas_task *task = TO_SAS_TASK(cmd); /* At this point, we only get called following an actual abort * of the task, so we should be guaranteed not to be racing with @@ -267,9 +267,9 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) static void sas_eh_defer_cmd(struct scsi_cmnd *cmd) { - struct sas_task *task = TO_SAS_TASK(cmd); - struct domain_device *dev = task->dev; + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_ha_struct *ha = dev->port->ha; + struct sas_task *task = TO_SAS_TASK(cmd); if (!dev_is_sata(dev)) { sas_eh_finish_cmd(cmd); @@ -530,8 +530,9 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct sas_internal *i = to_sas_internal(shost->transportt); unsigned long flags; struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); + LIST_HEAD(done); -Again: + /* clean out any commands that won the completion vs eh race */ list_for_each_entry_safe(cmd, n, work_q, eh_entry) { struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_task *task; @@ -545,7 +546,12 @@ Again: spin_unlock_irqrestore(&dev->done_lock, flags); if (!task) - continue; + list_move_tail(&cmd->eh_entry, &done); + } + + Again: + list_for_each_entry_safe(cmd, n, work_q, eh_entry) { + struct sas_task *task = TO_SAS_TASK(cmd); list_del_init(&cmd->eh_entry); @@ -649,15 +655,16 @@ Again: goto clear_q; } } + out: + list_splice_tail(&done, work_q); list_splice_tail_init(&ha->eh_ata_q, work_q); return list_empty(work_q); -clear_q: + + clear_q: SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__); list_for_each_entry_safe(cmd, n, work_q, eh_entry) sas_eh_finish_cmd(cmd); - - list_splice_tail_init(&ha->eh_ata_q, work_q); - return list_empty(work_q); + goto out; } void sas_scsi_recover_host(struct Scsi_Host *shost) diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index feb61a8..55bab86 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -174,7 +174,11 @@ struct sata_device { struct ata_taskfile tf; }; -/* ---------- Domain device ---------- */ +enum { + SAS_DEV_GONE, + SAS_DEV_DESTROY, +}; + struct domain_device { spinlock_t done_lock; enum sas_dev_type dev_type; @@ -191,7 +195,7 @@ struct domain_device { struct sas_phy *phy; struct list_head dev_list_node; - struct list_head disco_list_node; + struct list_head disco_list_node; /* awaiting probe or destruct */ enum sas_protocol iproto; enum sas_protocol tproto; @@ -209,7 +213,7 @@ struct domain_device { }; void *lldd_dev; - int gone; + unsigned long state; struct kref kref; }; The following changes since commit 5c41dc3a79150e93e5d050871a10b761be8281a1: [SCSI] lpfc 8.3.28: Update driver version to 8.3.28 (2011-12-15 10:57:45 +0400) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v4 Dan Williams (36): libsas: remove unused ata_task_resp fields libsas: kill sas_slave_destroy libsas: fix domain_device leak libsas: fix leak of dev->sata_dev.identify_[packet_]device libsas: replace event locks with atomic bitops libsas: convert ha->state to flags libsas: introduce sas_drain_work() libsas: remove ata_port.lock management duties from lldds libsas: convert dev->gone to flags libsas: prevent domain rediscovery competing with ata error handling libsas: use ->set_dmamode to notify lldds of NCQ parameters libsas: kill invocation of scsi_eh_finish_cmd from sas_ata_task_done libsas: close error handling vs sas_ata_task_done() race libsas: prevent double completion of scmds from eh libsas: fix timeout vs completion race libsas: let libata handle command timeouts libsas: defer SAS_TASK_NEED_DEV_RESET commands to libata libsas: use libata-eh-reset for sata rediscovery fis transmit failures libsas: perform sas-transport resets in shost->workq context libsas: execute transport link resets with libata-eh via host workqueue libsas: sas_phy_enable via transport_sas_phy_reset libsas: async ata-eh libsas: poll for ata device readiness after reset libsas: don't mark expanders as gone when a child device is removed libsas: check for 'gone' expanders in smp_execute_task() libsas: fix sas_find_local_phy(), take phy references libsas: don't recover 'gone' devices in sas_ata_hard_reset() isci: kill iphy->isci_port lookups isci: kill isci_port->status isci: fix interpretation of "hard" reset isci: stop interpreting ->lldd_lu_reset() as an ata soft-reset isci: ->lldd_ata_check_ready handler isci: remove bus and reset handlers isci: remove IDEV_EH hack to disable "discovery-time" ata resets libsas: pre-clean commands that won the eh vs completion race libsas: feed the scsi_block_when_processing_errors() meter Jeff Skirvin (2): libsas: Remove redundant phy state notification calls. libsas: add mutex for SMP task execution Documentation/scsi/libsas.txt | 15 - drivers/ata/libata-eh.c | 1 + drivers/ata/libata.h | 1 - drivers/scsi/aic94xx/aic94xx.h | 2 + drivers/scsi/aic94xx/aic94xx_dev.c | 38 ++- drivers/scsi/aic94xx/aic94xx_init.c | 5 +- drivers/scsi/aic94xx/aic94xx_tmf.c | 9 +- drivers/scsi/isci/host.c | 8 +- drivers/scsi/isci/host.h | 19 +- drivers/scsi/isci/init.c | 13 +- drivers/scsi/isci/phy.c | 18 +- drivers/scsi/isci/phy.h | 1 - drivers/scsi/isci/port.c | 217 ++++++------ drivers/scsi/isci/port.h | 11 +- drivers/scsi/isci/remote_device.c | 32 +-- drivers/scsi/isci/remote_device.h | 7 +- drivers/scsi/isci/request.c | 198 +---------- drivers/scsi/isci/request.h | 9 +- drivers/scsi/isci/task.c | 158 ++------- drivers/scsi/isci/task.h | 40 -- drivers/scsi/libsas/sas_ata.c | 692 +++++++++++++++-------------------- drivers/scsi/libsas/sas_discover.c | 152 +++++++-- drivers/scsi/libsas/sas_event.c | 89 +++++- drivers/scsi/libsas/sas_expander.c | 113 ++++-- drivers/scsi/libsas/sas_init.c | 192 +++++++++- drivers/scsi/libsas/sas_internal.h | 73 ++-- drivers/scsi/libsas/sas_phy.c | 12 +- drivers/scsi/libsas/sas_port.c | 26 +- drivers/scsi/libsas/sas_scsi_host.c | 320 +++++++--------- drivers/scsi/mvsas/mv_init.c | 1 - drivers/scsi/mvsas/mv_sas.c | 11 +- drivers/scsi/pm8001/pm8001_init.c | 1 - drivers/scsi/pm8001/pm8001_sas.c | 29 +- drivers/scsi/scsi_transport_sas.c | 59 +++- include/linux/libata.h | 1 + include/scsi/libsas.h | 67 ++-- include/scsi/sas_ata.h | 26 +- include/scsi/scsi_transport_sas.h | 12 +- 38 files changed, 1321 insertions(+), 1357 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html