When managing shost->host_eh_scheduled libata assumes that there is a 1:1 shost-to-ata_port relationship. libsas creates a 1:N relationship so it needs to manage host_eh_scheduled cumulatively at the host level. The sched_eh and end_eh port port ops allow libsas to track when domain devices enter/leave the "eh-pending" state under ha->lock (previously named ha->state_lock, but it is no longer just a lock for ha->state changes). Since host_eh_scheduled indicates eh without backing commands pinning the device it can be deallocated at any time. Move the taking of the domain_device reference under the port_lock to guarantee that the ata_port stays around for the duration of eh. Cc: Tejun Heo <tj@xxxxxxxxxx> Acked-by: Jacek Danecki <jacek.danecki@xxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/ata/libata-core.c | 4 ++ drivers/ata/libata-eh.c | 57 ++++++++++++++++++++++++++++------- drivers/scsi/libsas/sas_ata.c | 38 +++++++++++++++++++++-- drivers/scsi/libsas/sas_discover.c | 6 ++-- drivers/scsi/libsas/sas_event.c | 12 ++++--- drivers/scsi/libsas/sas_init.c | 14 ++++----- drivers/scsi/libsas/sas_scsi_host.c | 27 +++++++++++++---- include/linux/libata.h | 4 ++ include/scsi/libsas.h | 4 ++ include/scsi/sas_ata.h | 5 +++ 10 files changed, 134 insertions(+), 37 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index e0bda9f..ea8444e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -80,6 +80,8 @@ const struct ata_port_operations ata_base_port_ops = { .prereset = ata_std_prereset, .postreset = ata_std_postreset, .error_handler = ata_std_error_handler, + .sched_eh = ata_std_sched_eh, + .end_eh = ata_std_end_eh, }; const struct ata_port_operations sata_port_ops = { @@ -6635,6 +6637,8 @@ struct ata_port_operations ata_dummy_port_ops = { .qc_prep = ata_noop_qc_prep, .qc_issue = ata_dummy_qc_issue, .error_handler = ata_dummy_error_handler, + .sched_eh = ata_std_sched_eh, + .end_eh = ata_std_end_eh, }; const struct ata_port_info ata_dummy_port_info = { diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c61316e..4f12f63 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -793,12 +793,12 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ata_for_each_link(link, ap, HOST_FIRST) memset(&link->eh_info, 0, sizeof(link->eh_info)); - /* Clear host_eh_scheduled while holding ap->lock such - * that if exception occurs after this point but - * before EH completion, SCSI midlayer will + /* end eh (clear host_eh_scheduled) while holding + * ap->lock such that if exception occurs after this + * point but before EH completion, SCSI midlayer will * re-initiate EH. */ - host->host_eh_scheduled = 0; + ap->ops->end_eh(ap); spin_unlock_irqrestore(ap->lock, flags); ata_eh_release(ap); @@ -986,16 +986,13 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) } /** - * ata_port_schedule_eh - schedule error handling without a qc - * @ap: ATA port to schedule EH for - * - * Schedule error handling for @ap. EH will kick in as soon as - * all commands are drained. + * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine + * @ap: ATA port to schedule EH for * - * LOCKING: + * LOCKING: inherited from ata_port_schedule_eh * spin_lock_irqsave(host lock) */ -void ata_port_schedule_eh(struct ata_port *ap) +void ata_std_sched_eh(struct ata_port *ap) { WARN_ON(!ap->ops->error_handler); @@ -1007,6 +1004,44 @@ void ata_port_schedule_eh(struct ata_port *ap) DPRINTK("port EH scheduled\n"); } +EXPORT_SYMBOL_GPL(ata_std_sched_eh); + +/** + * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine + * @ap: ATA port to end EH for + * + * In the libata object model there is a 1:1 mapping of ata_port to + * shost, so host fields can be directly manipulated under ap->lock, in + * the libsas case we need to hold a lock at the ha->level to coordinate + * these events. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_std_end_eh(struct ata_port *ap) +{ + struct Scsi_Host *host = ap->scsi_host; + + host->host_eh_scheduled = 0; +} +EXPORT_SYMBOL(ata_std_end_eh); + + +/** + * ata_port_schedule_eh - schedule error handling without a qc + * @ap: ATA port to schedule EH for + * + * Schedule error handling for @ap. EH will kick in as soon as + * all commands are drained. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_port_schedule_eh(struct ata_port *ap) +{ + /* see: ata_std_sched_eh, unless you know better */ + ap->ops->sched_eh(ap); +} static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) { diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index bc83704..545b78d 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -523,6 +523,31 @@ static void sas_ata_set_dmamode(struct ata_port *ap, struct ata_device *ata_dev) i->dft->lldd_ata_set_dmamode(dev); } +static void sas_ata_sched_eh(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + struct sas_ha_struct *ha = dev->port->ha; + unsigned long flags; + + spin_lock_irqsave(&ha->lock, flags); + if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state)) + ha->eh_active++; + ata_std_sched_eh(ap); + spin_unlock_irqrestore(&ha->lock, flags); +} + +void sas_ata_end_eh(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + struct sas_ha_struct *ha = dev->port->ha; + unsigned long flags; + + spin_lock_irqsave(&ha->lock, flags); + if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state)) + ha->eh_active--; + spin_unlock_irqrestore(&ha->lock, flags); +} + static struct ata_port_operations sas_sata_ops = { .prereset = ata_std_prereset, .hardreset = sas_ata_hard_reset, @@ -536,6 +561,8 @@ static struct ata_port_operations sas_sata_ops = { .port_start = ata_sas_port_start, .port_stop = ata_sas_port_stop, .set_dmamode = sas_ata_set_dmamode, + .sched_eh = sas_ata_sched_eh, + .end_eh = sas_ata_end_eh, }; static struct ata_port_info sata_port_info = { @@ -708,10 +735,6 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie) struct ata_port *ap = dev->sata_dev.ap; struct sas_ha_struct *ha = dev->port->ha; - /* hold a reference over eh since we may be racing with final - * remove once all commands are completed - */ - kref_get(&dev->kref); sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n"); ata_scsi_port_error_handler(ha->core.shost, ap); sas_put_device(dev); @@ -754,6 +777,13 @@ void sas_ata_strategy_handler(struct Scsi_Host *shost) list_for_each_entry(dev, &port->dev_list, dev_list_node) { if (!sas_ata_dev_eh_valid(dev)) continue; + + /* hold a reference over eh since we may be + * racing with final remove once all commands + * are completed + */ + kref_get(&dev->kref); + async_schedule_domain(async_sas_ata_eh, dev, &async); } spin_unlock(&port->dev_list_lock); diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index c7ac882..b82949c 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -282,6 +282,8 @@ static void sas_unregister_common_dev(struct asd_sas_port *port, struct domain_d spin_lock_irq(&port->dev_list_lock); list_del_init(&dev->dev_list_node); + if (dev_is_sata(dev)) + sas_ata_end_eh(dev->sata_dev.ap); spin_unlock_irq(&port->dev_list_lock); sas_put_device(dev); @@ -479,9 +481,9 @@ static void sas_chain_event(int event, unsigned long *pending, if (!test_and_set_bit(event, pending)) { unsigned long flags; - spin_lock_irqsave(&ha->state_lock, flags); + spin_lock_irqsave(&ha->lock, flags); sas_chain_work(ha, sw); - spin_unlock_irqrestore(&ha->state_lock, flags); + spin_unlock_irqrestore(&ha->lock, flags); } } diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c index 4e4292d..789c4d8 100644 --- a/drivers/scsi/libsas/sas_event.c +++ b/drivers/scsi/libsas/sas_event.c @@ -47,9 +47,9 @@ static void sas_queue_event(int event, unsigned long *pending, if (!test_and_set_bit(event, pending)) { unsigned long flags; - spin_lock_irqsave(&ha->state_lock, flags); + spin_lock_irqsave(&ha->lock, flags); sas_queue_work(ha, work); - spin_unlock_irqrestore(&ha->state_lock, flags); + spin_unlock_irqrestore(&ha->lock, flags); } } @@ -61,18 +61,18 @@ void __sas_drain_work(struct sas_ha_struct *ha) set_bit(SAS_HA_DRAINING, &ha->state); /* flush submitters */ - spin_lock_irq(&ha->state_lock); - spin_unlock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); + spin_unlock_irq(&ha->lock); drain_workqueue(wq); - spin_lock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); clear_bit(SAS_HA_DRAINING, &ha->state); list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) { list_del_init(&sw->drain_node); sas_queue_work(ha, sw); } - spin_unlock_irq(&ha->state_lock); + spin_unlock_irq(&ha->lock); } int sas_drain_work(struct sas_ha_struct *ha) diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index 10cb5ae..6909fef 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -114,7 +114,7 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) sas_ha->lldd_queue_size = 128; /* Sanity */ set_bit(SAS_HA_REGISTERED, &sas_ha->state); - spin_lock_init(&sas_ha->state_lock); + spin_lock_init(&sas_ha->lock); mutex_init(&sas_ha->drain_mutex); INIT_LIST_HEAD(&sas_ha->defer_q); @@ -163,9 +163,9 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha) * events to be queued, and flush any in-progress drainers */ mutex_lock(&sas_ha->drain_mutex); - spin_lock_irq(&sas_ha->state_lock); + spin_lock_irq(&sas_ha->lock); clear_bit(SAS_HA_REGISTERED, &sas_ha->state); - spin_unlock_irq(&sas_ha->state_lock); + spin_unlock_irq(&sas_ha->lock); __sas_drain_work(sas_ha); mutex_unlock(&sas_ha->drain_mutex); @@ -411,9 +411,9 @@ static int queue_phy_reset(struct sas_phy *phy, int hard_reset) d->reset_result = 0; d->hard_reset = hard_reset; - spin_lock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); sas_queue_work(ha, &d->reset_work); - spin_unlock_irq(&ha->state_lock); + spin_unlock_irq(&ha->lock); rc = sas_drain_work(ha); if (rc == 0) @@ -438,9 +438,9 @@ static int queue_phy_enable(struct sas_phy *phy, int enable) d->enable_result = 0; d->enable = enable; - spin_lock_irq(&ha->state_lock); + spin_lock_irq(&ha->lock); sas_queue_work(ha, &d->enable_work); - spin_unlock_irq(&ha->state_lock); + spin_unlock_irq(&ha->lock); rc = sas_drain_work(ha); if (rc == 0) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 17339e5..52d5b01 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -667,16 +667,20 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * goto out; } + void sas_scsi_recover_host(struct Scsi_Host *shost) { struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); - unsigned long flags; LIST_HEAD(eh_work_q); + int tries = 0; + bool retry; - spin_lock_irqsave(shost->host_lock, flags); +retry: + tries++; + retry = true; + spin_lock_irq(shost->host_lock); list_splice_init(&shost->eh_cmd_q, &eh_work_q); - shost->host_eh_scheduled = 0; - spin_unlock_irqrestore(shost->host_lock, flags); + spin_unlock_irq(shost->host_lock); SAS_DPRINTK("Enter %s busy: %d failed: %d\n", __func__, shost->host_busy, shost->host_failed); @@ -710,8 +714,19 @@ out: scsi_eh_flush_done_q(&ha->eh_done_q); - SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n", - __func__, shost->host_busy, shost->host_failed); + /* check if any new eh work was scheduled during the last run */ + spin_lock_irq(&ha->lock); + if (ha->eh_active == 0) { + shost->host_eh_scheduled = 0; + retry = false; + } + spin_unlock_irq(&ha->lock); + + if (retry) + goto retry; + + SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n", + __func__, shost->host_busy, shost->host_failed, tries); } enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) diff --git a/include/linux/libata.h b/include/linux/libata.h index 42378d6..83ff256 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -845,6 +845,8 @@ struct ata_port_operations { void (*error_handler)(struct ata_port *ap); void (*lost_interrupt)(struct ata_port *ap); void (*post_internal_cmd)(struct ata_queued_cmd *qc); + void (*sched_eh)(struct ata_port *ap); + void (*end_eh)(struct ata_port *ap); /* * Optional features @@ -1165,6 +1167,8 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset); extern void ata_std_error_handler(struct ata_port *ap); +extern void ata_std_sched_eh(struct ata_port *ap); +extern void ata_std_end_eh(struct ata_port *ap); extern int ata_link_nr_enabled(struct ata_link *link); /* diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index f4f1c96..2718b24 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -177,6 +177,7 @@ struct sata_device { enum { SAS_DEV_GONE, SAS_DEV_DESTROY, + SAS_DEV_EH_PENDING, }; struct domain_device { @@ -384,7 +385,8 @@ struct sas_ha_struct { struct list_head defer_q; /* work queued while draining */ struct mutex drain_mutex; unsigned long state; - spinlock_t state_lock; + spinlock_t lock; + int eh_active; struct mutex disco_mutex; diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index cdccd2e..cae55b6 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -45,6 +45,7 @@ void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, void sas_ata_schedule_reset(struct domain_device *dev); void sas_ata_wait_eh(struct domain_device *dev); void sas_probe_sata(struct asd_sas_port *port); +void sas_ata_end_eh(struct ata_port *ap); #else @@ -85,6 +86,10 @@ static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy { return 0; } + +static inline void sas_ata_end_eh(struct ata_port *ap) +{ +} #endif #endif /* _SAS_ATA_H_ */ -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html