Ping? Ack on the ata changes? I'd like to submit this to scsi-rc-fixes for 3.4-rc to fix regression fallout around host_eh_scheduled handling now that libsas ata eh is asynchronous. On Sat, Mar 10, 2012 at 8:39 PM, Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > When managing shost->host_eh_scheduled libata assumes that there is a > 1:1 shost-to-ata_port relationship. libsas creates a 1:N relationship > so it needs to manage host_eh_scheduled cumulatively at the host level. > The sched_eh and end_eh port port ops allow libsas to track when domain > devices enter/leave the "eh-pending" state under ha->lock (previously > named ha->state_lock, but it is no longer just a lock for ha->state > changes). > > Since host_eh_scheduled indicates eh without backing commands pinning > the device it can be deallocated at any time. Move the taking of the > domain_device reference under the port_lock to guarantee that the > ata_port stays around for the duration of eh. > > Cc: Tejun Heo <tj@xxxxxxxxxx> > Acked-by: Jacek Danecki <jacek.danecki@xxxxxxxxx> > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > --- > drivers/ata/libata-core.c | 4 ++ > drivers/ata/libata-eh.c | 57 ++++++++++++++++++++++++++++------- > drivers/scsi/libsas/sas_ata.c | 38 +++++++++++++++++++++-- > drivers/scsi/libsas/sas_discover.c | 6 ++-- > drivers/scsi/libsas/sas_event.c | 12 ++++--- > drivers/scsi/libsas/sas_init.c | 14 ++++----- > drivers/scsi/libsas/sas_scsi_host.c | 27 +++++++++++++---- > include/linux/libata.h | 4 ++ > include/scsi/libsas.h | 4 ++ > include/scsi/sas_ata.h | 5 +++ > 10 files changed, 134 insertions(+), 37 deletions(-) > > diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c > index 1654c94..827881e 100644 > --- a/drivers/ata/libata-core.c > +++ b/drivers/ata/libata-core.c > @@ -79,6 +79,8 @@ const struct ata_port_operations ata_base_port_ops = { > .prereset = ata_std_prereset, > .postreset = ata_std_postreset, > .error_handler = ata_std_error_handler, > + .sched_eh = ata_std_sched_eh, > + .end_eh = ata_std_end_eh, > }; > > const struct ata_port_operations sata_port_ops = { > @@ -6573,6 +6575,8 @@ struct ata_port_operations ata_dummy_port_ops = { > .qc_prep = ata_noop_qc_prep, > .qc_issue = ata_dummy_qc_issue, > .error_handler = ata_dummy_error_handler, > + .sched_eh = ata_std_sched_eh, > + .end_eh = ata_std_end_eh, > }; > > const struct ata_port_info ata_dummy_port_info = { > diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c > index c61316e..4f12f63 100644 > --- a/drivers/ata/libata-eh.c > +++ b/drivers/ata/libata-eh.c > @@ -793,12 +793,12 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) > ata_for_each_link(link, ap, HOST_FIRST) > memset(&link->eh_info, 0, sizeof(link->eh_info)); > > - /* Clear host_eh_scheduled while holding ap->lock such > - * that if exception occurs after this point but > - * before EH completion, SCSI midlayer will > + /* end eh (clear host_eh_scheduled) while holding > + * ap->lock such that if exception occurs after this > + * point but before EH completion, SCSI midlayer will > * re-initiate EH. > */ > - host->host_eh_scheduled = 0; > + ap->ops->end_eh(ap); > > spin_unlock_irqrestore(ap->lock, flags); > ata_eh_release(ap); > @@ -986,16 +986,13 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) > } > > /** > - * ata_port_schedule_eh - schedule error handling without a qc > - * @ap: ATA port to schedule EH for > - * > - * Schedule error handling for @ap. EH will kick in as soon as > - * all commands are drained. > + * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine > + * @ap: ATA port to schedule EH for > * > - * LOCKING: > + * LOCKING: inherited from ata_port_schedule_eh > * spin_lock_irqsave(host lock) > */ > -void ata_port_schedule_eh(struct ata_port *ap) > +void ata_std_sched_eh(struct ata_port *ap) > { > WARN_ON(!ap->ops->error_handler); > > @@ -1007,6 +1004,44 @@ void ata_port_schedule_eh(struct ata_port *ap) > > DPRINTK("port EH scheduled\n"); > } > +EXPORT_SYMBOL_GPL(ata_std_sched_eh); > + > +/** > + * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine > + * @ap: ATA port to end EH for > + * > + * In the libata object model there is a 1:1 mapping of ata_port to > + * shost, so host fields can be directly manipulated under ap->lock, in > + * the libsas case we need to hold a lock at the ha->level to coordinate > + * these events. > + * > + * LOCKING: > + * spin_lock_irqsave(host lock) > + */ > +void ata_std_end_eh(struct ata_port *ap) > +{ > + struct Scsi_Host *host = ap->scsi_host; > + > + host->host_eh_scheduled = 0; > +} > +EXPORT_SYMBOL(ata_std_end_eh); > + > + > +/** > + * ata_port_schedule_eh - schedule error handling without a qc > + * @ap: ATA port to schedule EH for > + * > + * Schedule error handling for @ap. EH will kick in as soon as > + * all commands are drained. > + * > + * LOCKING: > + * spin_lock_irqsave(host lock) > + */ > +void ata_port_schedule_eh(struct ata_port *ap) > +{ > + /* see: ata_std_sched_eh, unless you know better */ > + ap->ops->sched_eh(ap); > +} > > static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) > { > diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c > index 729a7b6..d30a093 100644 > --- a/drivers/scsi/libsas/sas_ata.c > +++ b/drivers/scsi/libsas/sas_ata.c > @@ -523,6 +523,31 @@ static void sas_ata_set_dmamode(struct ata_port *ap, struct ata_device *ata_dev) > i->dft->lldd_ata_set_dmamode(dev); > } > > +static void sas_ata_sched_eh(struct ata_port *ap) > +{ > + struct domain_device *dev = ap->private_data; > + struct sas_ha_struct *ha = dev->port->ha; > + unsigned long flags; > + > + spin_lock_irqsave(&ha->lock, flags); > + if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state)) > + ha->eh_active++; > + ata_std_sched_eh(ap); > + spin_unlock_irqrestore(&ha->lock, flags); > +} > + > +void sas_ata_end_eh(struct ata_port *ap) > +{ > + struct domain_device *dev = ap->private_data; > + struct sas_ha_struct *ha = dev->port->ha; > + unsigned long flags; > + > + spin_lock_irqsave(&ha->lock, flags); > + if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state)) > + ha->eh_active--; > + spin_unlock_irqrestore(&ha->lock, flags); > +} > + > static struct ata_port_operations sas_sata_ops = { > .prereset = ata_std_prereset, > .hardreset = sas_ata_hard_reset, > @@ -536,6 +561,8 @@ static struct ata_port_operations sas_sata_ops = { > .port_start = ata_sas_port_start, > .port_stop = ata_sas_port_stop, > .set_dmamode = sas_ata_set_dmamode, > + .sched_eh = sas_ata_sched_eh, > + .end_eh = sas_ata_end_eh, > }; > > static struct ata_port_info sata_port_info = { > @@ -684,10 +711,6 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie) > struct ata_port *ap = dev->sata_dev.ap; > struct sas_ha_struct *ha = dev->port->ha; > > - /* hold a reference over eh since we may be racing with final > - * remove once all commands are completed > - */ > - kref_get(&dev->kref); > sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n"); > ata_scsi_port_error_handler(ha->core.shost, ap); > sas_put_device(dev); > @@ -730,6 +753,13 @@ void sas_ata_strategy_handler(struct Scsi_Host *shost) > list_for_each_entry(dev, &port->dev_list, dev_list_node) { > if (!sas_ata_dev_eh_valid(dev)) > continue; > + > + /* hold a reference over eh since we may be > + * racing with final remove once all commands > + * are completed > + */ > + kref_get(&dev->kref); > + > async_schedule_domain(async_sas_ata_eh, dev, &async); > } > spin_unlock(&port->dev_list_lock); > diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c > index bd8c636..a730ac9 100644 > --- a/drivers/scsi/libsas/sas_discover.c > +++ b/drivers/scsi/libsas/sas_discover.c > @@ -282,6 +282,8 @@ static void sas_unregister_common_dev(struct asd_sas_port *port, struct domain_d > > spin_lock_irq(&port->dev_list_lock); > list_del_init(&dev->dev_list_node); > + if (dev_is_sata(dev)) > + sas_ata_end_eh(dev->sata_dev.ap); > spin_unlock_irq(&port->dev_list_lock); > > sas_put_device(dev); > @@ -479,9 +481,9 @@ static void sas_chain_event(int event, unsigned long *pending, > if (!test_and_set_bit(event, pending)) { > unsigned long flags; > > - spin_lock_irqsave(&ha->state_lock, flags); > + spin_lock_irqsave(&ha->lock, flags); > sas_chain_work(ha, sw); > - spin_unlock_irqrestore(&ha->state_lock, flags); > + spin_unlock_irqrestore(&ha->lock, flags); > } > } > > diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c > index 4e4292d..789c4d8 100644 > --- a/drivers/scsi/libsas/sas_event.c > +++ b/drivers/scsi/libsas/sas_event.c > @@ -47,9 +47,9 @@ static void sas_queue_event(int event, unsigned long *pending, > if (!test_and_set_bit(event, pending)) { > unsigned long flags; > > - spin_lock_irqsave(&ha->state_lock, flags); > + spin_lock_irqsave(&ha->lock, flags); > sas_queue_work(ha, work); > - spin_unlock_irqrestore(&ha->state_lock, flags); > + spin_unlock_irqrestore(&ha->lock, flags); > } > } > > @@ -61,18 +61,18 @@ void __sas_drain_work(struct sas_ha_struct *ha) > > set_bit(SAS_HA_DRAINING, &ha->state); > /* flush submitters */ > - spin_lock_irq(&ha->state_lock); > - spin_unlock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > + spin_unlock_irq(&ha->lock); > > drain_workqueue(wq); > > - spin_lock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > clear_bit(SAS_HA_DRAINING, &ha->state); > list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) { > list_del_init(&sw->drain_node); > sas_queue_work(ha, sw); > } > - spin_unlock_irq(&ha->state_lock); > + spin_unlock_irq(&ha->lock); > } > > int sas_drain_work(struct sas_ha_struct *ha) > diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c > index 10cb5ae..6909fef 100644 > --- a/drivers/scsi/libsas/sas_init.c > +++ b/drivers/scsi/libsas/sas_init.c > @@ -114,7 +114,7 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) > sas_ha->lldd_queue_size = 128; /* Sanity */ > > set_bit(SAS_HA_REGISTERED, &sas_ha->state); > - spin_lock_init(&sas_ha->state_lock); > + spin_lock_init(&sas_ha->lock); > mutex_init(&sas_ha->drain_mutex); > INIT_LIST_HEAD(&sas_ha->defer_q); > > @@ -163,9 +163,9 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha) > * events to be queued, and flush any in-progress drainers > */ > mutex_lock(&sas_ha->drain_mutex); > - spin_lock_irq(&sas_ha->state_lock); > + spin_lock_irq(&sas_ha->lock); > clear_bit(SAS_HA_REGISTERED, &sas_ha->state); > - spin_unlock_irq(&sas_ha->state_lock); > + spin_unlock_irq(&sas_ha->lock); > __sas_drain_work(sas_ha); > mutex_unlock(&sas_ha->drain_mutex); > > @@ -411,9 +411,9 @@ static int queue_phy_reset(struct sas_phy *phy, int hard_reset) > d->reset_result = 0; > d->hard_reset = hard_reset; > > - spin_lock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > sas_queue_work(ha, &d->reset_work); > - spin_unlock_irq(&ha->state_lock); > + spin_unlock_irq(&ha->lock); > > rc = sas_drain_work(ha); > if (rc == 0) > @@ -438,9 +438,9 @@ static int queue_phy_enable(struct sas_phy *phy, int enable) > d->enable_result = 0; > d->enable = enable; > > - spin_lock_irq(&ha->state_lock); > + spin_lock_irq(&ha->lock); > sas_queue_work(ha, &d->enable_work); > - spin_unlock_irq(&ha->state_lock); > + spin_unlock_irq(&ha->lock); > > rc = sas_drain_work(ha); > if (rc == 0) > diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c > index 49a9113..cd63d80 100644 > --- a/drivers/scsi/libsas/sas_scsi_host.c > +++ b/drivers/scsi/libsas/sas_scsi_host.c > @@ -667,16 +667,20 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * > goto out; > } > > + > void sas_scsi_recover_host(struct Scsi_Host *shost) > { > struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); > - unsigned long flags; > LIST_HEAD(eh_work_q); > + int tries = 0; > + bool retry; > > - spin_lock_irqsave(shost->host_lock, flags); > +retry: > + tries++; > + retry = true; > + spin_lock_irq(shost->host_lock); > list_splice_init(&shost->eh_cmd_q, &eh_work_q); > - shost->host_eh_scheduled = 0; > - spin_unlock_irqrestore(shost->host_lock, flags); > + spin_unlock_irq(shost->host_lock); > > SAS_DPRINTK("Enter %s busy: %d failed: %d\n", > __func__, shost->host_busy, shost->host_failed); > @@ -710,8 +714,19 @@ out: > > scsi_eh_flush_done_q(&ha->eh_done_q); > > - SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n", > - __func__, shost->host_busy, shost->host_failed); > + /* check if any new eh work was scheduled during the last run */ > + spin_lock_irq(&ha->lock); > + if (ha->eh_active == 0) { > + shost->host_eh_scheduled = 0; > + retry = false; > + } > + spin_unlock_irq(&ha->lock); > + > + if (retry) > + goto retry; > + > + SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n", > + __func__, shost->host_busy, shost->host_failed, tries); > } > > enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) > diff --git a/include/linux/libata.h b/include/linux/libata.h > index 42378d6..83ff256 100644 > --- a/include/linux/libata.h > +++ b/include/linux/libata.h > @@ -845,6 +845,8 @@ struct ata_port_operations { > void (*error_handler)(struct ata_port *ap); > void (*lost_interrupt)(struct ata_port *ap); > void (*post_internal_cmd)(struct ata_queued_cmd *qc); > + void (*sched_eh)(struct ata_port *ap); > + void (*end_eh)(struct ata_port *ap); > > /* > * Optional features > @@ -1165,6 +1167,8 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, > ata_reset_fn_t softreset, ata_reset_fn_t hardreset, > ata_postreset_fn_t postreset); > extern void ata_std_error_handler(struct ata_port *ap); > +extern void ata_std_sched_eh(struct ata_port *ap); > +extern void ata_std_end_eh(struct ata_port *ap); > extern int ata_link_nr_enabled(struct ata_link *link); > > /* > diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h > index f4f1c96..c0c94b3 100644 > --- a/include/scsi/libsas.h > +++ b/include/scsi/libsas.h > @@ -177,6 +177,7 @@ struct sata_device { > enum { > SAS_DEV_GONE, > SAS_DEV_DESTROY, > + SAS_DEV_EH_PENDING, > }; > > struct domain_device { > @@ -384,7 +385,8 @@ struct sas_ha_struct { > struct list_head defer_q; /* work queued while draining */ > struct mutex drain_mutex; > unsigned long state; > - spinlock_t state_lock; > + spinlock_t lock; > + int eh_active; > > struct mutex disco_mutex; > > diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h > index 6d5d60c..3e13c28 100644 > --- a/include/scsi/sas_ata.h > +++ b/include/scsi/sas_ata.h > @@ -44,6 +44,7 @@ void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, > void sas_ata_schedule_reset(struct domain_device *dev); > void sas_ata_wait_eh(struct domain_device *dev); > void sas_probe_sata(struct asd_sas_port *port); > +void sas_ata_end_eh(struct ata_port *ap); > #else > > > @@ -81,6 +82,10 @@ static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy > { > return 0; > } > + > +static inline void sas_ata_end_eh(struct ata_port *ap) > +{ > +} > #endif > > #endif /* _SAS_ATA_H_ */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html