> > In the direct-attached case this routine returns the phy on which this > device was first discovered. Which is broken if we want to support > wide-targets, as this phy reference can become stale even though the > port is still active. > > In the expander-attached case this routine tries to lookup the phy by > scanning the attached sas addresses of the parent expander, and BUG_ONs > if it can't find it. However since eh and the libsas workqueue run > independently we can still be attempting device recovery via eh after > libsas has recorded the device as detached. This is even easier to hit > now that eh is blocked while device domain rediscovery takes place, and > that libata is fed more timed out commands increasing the chances that > it will try to recover the ata device. > > Arrange for dev->phy to always point to a last known good phy, it may be > stale after the port is torn down, but it will catch up for wide port > reconfigurations, and never be NULL. > > Q: How is pm8001_I_T_nexus_reset getting away with not performing reset > on direct attached sata devices? > [Jack Wang] We found reset may lead to some SATA disks can not be found sometime, in fact no only for direct attached sata devices. I wonder why we always reset the sata device when probe, for pm8001 direct attached sata firmware will report Initial SATA FIS when phy ready. > Cc: Jack Wang <jack_wang@xxxxxxxxx> > Cc: Xiangliang Yu <yuxiangl@xxxxxxxxxxx> > Cc: Luben Tuikov <ltuikov@xxxxxxxxx> > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > --- > drivers/scsi/aic94xx/aic94xx_tmf.c | 9 ++++++-- > drivers/scsi/isci/task.c | 9 +++++--- > drivers/scsi/libsas/sas_ata.c | 7 +++++- > drivers/scsi/libsas/sas_discover.c | 24 ++++++++++++++++++++++ > drivers/scsi/libsas/sas_expander.c | 5 ++++- > drivers/scsi/libsas/sas_internal.h | 1 + > drivers/scsi/libsas/sas_port.c | 7 +++--- > drivers/scsi/libsas/sas_scsi_host.c | 38 > +++++++++++++++++------------------ > drivers/scsi/mvsas/mv_sas.c | 3 ++- > drivers/scsi/pm8001/pm8001_sas.c | 19 +++++++++++------- > drivers/scsi/scsi_transport_sas.c | 23 +++++++++++++++++++++ > include/scsi/libsas.h | 9 ++++++-- > include/scsi/scsi_transport_sas.h | 6 ++++++ > 13 files changed, 116 insertions(+), 44 deletions(-) > > diff --git a/drivers/scsi/aic94xx/aic94xx_tmf.c > b/drivers/scsi/aic94xx/aic94xx_tmf.c > index 0add73b..50b914f 100644 > --- a/drivers/scsi/aic94xx/aic94xx_tmf.c > +++ b/drivers/scsi/aic94xx/aic94xx_tmf.c > @@ -181,7 +181,7 @@ static int asd_clear_nexus_I_T(struct domain_device *dev, > int asd_I_T_nexus_reset(struct domain_device *dev) > { > int res, tmp_res, i; > - struct sas_phy *phy = sas_find_local_phy(dev); > + struct sas_phy *phy = sas_get_local_phy(dev); > /* Standard mandates link reset for ATA (type 0) and > * hard reset for SSP (type 1) */ > int reset_type = (dev->dev_type == SATA_DEV || > @@ -201,7 +201,7 @@ int asd_I_T_nexus_reset(struct domain_device *dev) > for (i = 0 ; i < 3; i++) { > tmp_res = asd_clear_nexus_I_T(dev, NEXUS_PHASE_RESUME); > if (tmp_res == TC_RESUME) > - return res; > + goto out; > msleep(500); > } > > @@ -211,7 +211,10 @@ int asd_I_T_nexus_reset(struct domain_device *dev) > dev_printk(KERN_ERR, &phy->dev, > "Failed to resume nexus after reset 0x%x\n", tmp_res); > > - return TMF_RESP_FUNC_FAILED; > + res = TMF_RESP_FUNC_FAILED; > + out: > + sas_put_local_phy(phy); > + return res; > } > > static int asd_clear_nexus_I_T_L(struct domain_device *dev, u8 *lun) > diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c > index 5901a0e..a6ab49a 100644 > --- a/drivers/scsi/isci/task.c > +++ b/drivers/scsi/isci/task.c > @@ -1332,7 +1332,7 @@ isci_task_request_complete(struct isci_host *ihost, > static int isci_reset_device(struct isci_host *ihost, > struct isci_remote_device *idev) > { > - struct sas_phy *phy = sas_find_local_phy(idev->domain_dev); > + struct sas_phy *phy = sas_get_local_phy(idev->domain_dev); > enum sci_status status; > unsigned long flags; > int rc; > @@ -1347,8 +1347,8 @@ static int isci_reset_device(struct isci_host *ihost, > dev_dbg(&ihost->pdev->dev, > "%s: sci_remote_device_reset(%p) returned %d!\n", > __func__, idev, status); > - > - return TMF_RESP_FUNC_FAILED; > + rc = TMF_RESP_FUNC_FAILED; > + goto out; > } > spin_unlock_irqrestore(&ihost->scic_lock, flags); > > @@ -1369,7 +1369,8 @@ static int isci_reset_device(struct isci_host *ihost, > } > > dev_dbg(&ihost->pdev->dev, "%s: idev %p complete.\n", __func__, idev); > - > + out: > + sas_put_local_phy(phy); > return rc; > } > > diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c > index e174a73..96f316f 100644 > --- a/drivers/scsi/libsas/sas_ata.c > +++ b/drivers/scsi/libsas/sas_ata.c > @@ -279,9 +279,10 @@ static int smp_ata_check_ready(struct ata_link *link) > struct ata_port *ap = link->ap; > struct domain_device *dev = ap->private_data; > struct domain_device *ex_dev = dev->parent; > - struct sas_phy *phy = sas_find_local_phy(dev); > + struct sas_phy *phy = sas_get_local_phy(dev); > > res = sas_get_phy_attached_sas_addr(ex_dev, phy->number, addr); > + sas_put_local_phy(phy); > /* break the wait early if the expander is unreachable, > * otherwise keep polling > */ > @@ -314,10 +315,10 @@ static int sas_ata_hard_reset(struct ata_link *link, > unsigned int *class, > unsigned long deadline) > { > int ret = 0, res; > + struct sas_phy *phy; > struct ata_port *ap = link->ap; > int (*check_ready)(struct ata_link *link); > struct domain_device *dev = ap->private_data; > - struct sas_phy *phy = sas_find_local_phy(dev); > struct sas_internal *i = dev_to_sas_internal(dev); > > res = i->dft->lldd_I_T_nexus_reset(dev); > @@ -325,10 +326,12 @@ static int sas_ata_hard_reset(struct ata_link *link, > unsigned int *class, > if (res != TMF_RESP_FUNC_COMPLETE) > SAS_DPRINTK("%s: Unable to reset ata device?\n", __func__); > > + phy = sas_get_local_phy(dev); > if (scsi_is_sas_phy_local(phy)) > check_ready = local_ata_check_ready; > else > check_ready = smp_ata_check_ready; > + sas_put_local_phy(phy); > > ret = ata_wait_after_reset(link, deadline, check_ready); > if (ret && ret != -EAGAIN) > diff --git a/drivers/scsi/libsas/sas_discover.c > b/drivers/scsi/libsas/sas_discover.c > index 6e5fdfd..c765218 100644 > --- a/drivers/scsi/libsas/sas_discover.c > +++ b/drivers/scsi/libsas/sas_discover.c > @@ -147,6 +147,7 @@ static int sas_get_port_device(struct asd_sas_port *port) > memset(port->disc.eeds_a, 0, SAS_ADDR_SIZE); > memset(port->disc.eeds_b, 0, SAS_ADDR_SIZE); > port->disc.max_level = 0; > + sas_device_set_phy(dev, port->port); > > dev->rphy = rphy; > > @@ -234,6 +235,9 @@ void sas_free_device(struct kref *kref) > if (dev->parent) > sas_put_device(dev->parent); > > + sas_port_put_phy(dev->phy); > + dev->phy = NULL; > + > /* remove the phys and ports, everything else should be gone */ > if (dev->dev_type == EDGE_DEV || dev->dev_type == FANOUT_DEV) > kfree(dev->ex_dev.ex_phy); > @@ -307,6 +311,26 @@ void sas_unregister_domain_devices(struct asd_sas_port > *port) > > } > > +void sas_device_set_phy(struct domain_device *dev, struct sas_port *port) > +{ > + struct sas_ha_struct *ha; > + struct sas_phy *new_phy; > + > + if (!dev) > + return; > + > + ha = dev->port->ha; > + new_phy = sas_port_get_phy(port); > + > + /* pin and record last seen phy */ > + spin_lock_irq(&ha->phy_port_lock); > + if (new_phy) { > + sas_port_put_phy(dev->phy); > + dev->phy = new_phy; > + } > + spin_unlock_irq(&ha->phy_port_lock); > +} > + > /* ---------- Discovery and Revalidation ---------- */ > > /** > diff --git a/drivers/scsi/libsas/sas_expander.c > b/drivers/scsi/libsas/sas_expander.c > index e2efc6c..e47599b 100644 > --- a/drivers/scsi/libsas/sas_expander.c > +++ b/drivers/scsi/libsas/sas_expander.c > @@ -721,6 +721,7 @@ static struct domain_device *sas_ex_discover_end_dev( > } > } > sas_ex_get_linkrate(parent, child, phy); > + sas_device_set_phy(child, phy->port); > > #ifdef CONFIG_SCSI_SAS_ATA > if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) > { > @@ -1808,7 +1809,7 @@ static void sas_unregister_devs_sas_addr(struct > domain_device *parent, > { > struct expander_device *ex_dev = &parent->ex_dev; > struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; > - struct domain_device *child, *n; > + struct domain_device *child, *n, *found = NULL; > if (last) { > list_for_each_entry_safe(child, n, > &ex_dev->children, siblings) { > @@ -1820,6 +1821,7 @@ static void sas_unregister_devs_sas_addr(struct > domain_device *parent, > sas_unregister_ex_tree(parent->port, child); > else > sas_unregister_dev(parent->port, child); > + found = child; > break; > } > } > @@ -1828,6 +1830,7 @@ static void sas_unregister_devs_sas_addr(struct > domain_device *parent, > memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); > if (phy->port) { > sas_port_delete_phy(phy->port, phy->phy); > + sas_device_set_phy(found, phy->port); > if (phy->port->num_phys == 0) > sas_port_delete(phy->port); > phy->port = NULL; > diff --git a/drivers/scsi/libsas/sas_internal.h > b/drivers/scsi/libsas/sas_internal.h > index c6317c1..eec945e 100644 > --- a/drivers/scsi/libsas/sas_internal.h > +++ b/drivers/scsi/libsas/sas_internal.h > @@ -85,6 +85,7 @@ int sas_smp_phy_control(struct domain_device *dev, int > phy_id, > enum phy_func phy_func, struct sas_phy_linkrates *); > int sas_smp_get_phy_events(struct sas_phy *phy); > > +void sas_device_set_phy(struct domain_device *dev, struct sas_port *port); > struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy); > struct domain_device *sas_ex_to_ata(struct domain_device *ex_dev, int > phy_id); > int sas_get_phy_attached_sas_addr(struct domain_device *dev, int phy_id, > diff --git a/drivers/scsi/libsas/sas_port.c > b/drivers/scsi/libsas/sas_port.c > index e8e68d0..36e2905 100644 > --- a/drivers/scsi/libsas/sas_port.c > +++ b/drivers/scsi/libsas/sas_port.c > @@ -108,9 +108,6 @@ static void sas_form_port(struct asd_sas_phy *phy) > port->num_phys++; > port->phy_mask |= (1U << phy->id); > > - if (!port->phy) > - port->phy = phy->phy; > - > if (*(u64 *)port->attached_sas_addr == 0) { > port->class = phy->class; > memcpy(port->attached_sas_addr, phy->attached_sas_addr, > @@ -175,8 +172,10 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone) > sas_unregister_domain_devices(port); > sas_port_delete(port->port); > port->port = NULL; > - } else > + } else { > sas_port_delete_phy(port->port, phy->phy); > + sas_device_set_phy(dev, port->port); > + } > > if (si->dft->lldd_port_deformed) > si->dft->lldd_port_deformed(phy); > diff --git a/drivers/scsi/libsas/sas_scsi_host.c > b/drivers/scsi/libsas/sas_scsi_host.c > index b849dcd..59a227d 100644 > --- a/drivers/scsi/libsas/sas_scsi_host.c > +++ b/drivers/scsi/libsas/sas_scsi_host.c > @@ -439,30 +439,26 @@ static int sas_recover_I_T(struct domain_device *dev) > return res; > } > > -/* Find the sas_phy that's attached to this device */ > -struct sas_phy *sas_find_local_phy(struct domain_device *dev) > +/* take a reference on the last known good phy for this device */ > +struct sas_phy *sas_get_local_phy(struct domain_device *dev) > { > - struct domain_device *pdev = dev->parent; > - struct ex_phy *exphy = NULL; > - int i; > + struct sas_ha_struct *ha = dev->port->ha; > + struct sas_phy *phy; > + unsigned long flags; > > - /* Directly attached device */ > - if (!pdev) > - return dev->port->phy; > + /* a published domain device always has a valid phy, it may be > + * stale, but it is never NULL > + */ > + BUG_ON(!dev->phy); > > - /* Otherwise look in the expander */ > - for (i = 0; i < pdev->ex_dev.num_phys; i++) > - if (!memcmp(dev->sas_addr, > - pdev->ex_dev.ex_phy[i].attached_sas_addr, > - SAS_ADDR_SIZE)) { > - exphy = &pdev->ex_dev.ex_phy[i]; > - break; > - } > + spin_lock_irqsave(&ha->phy_port_lock, flags); > + phy = dev->phy; > + get_device(&phy->dev); > + spin_unlock_irqrestore(&ha->phy_port_lock, flags); > > - BUG_ON(!exphy); > - return exphy->phy; > + return phy; > } > -EXPORT_SYMBOL_GPL(sas_find_local_phy); > +EXPORT_SYMBOL_GPL(sas_get_local_phy); > > /* Attempt to send a LUN reset message to a device */ > int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) > @@ -489,7 +485,7 @@ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) > int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd) > { > struct domain_device *dev = cmd_to_domain_dev(cmd); > - struct sas_phy *phy = sas_find_local_phy(dev); > + struct sas_phy *phy = sas_get_local_phy(dev); > int res; > > res = sas_phy_reset(phy, 1); > @@ -497,6 +493,8 @@ int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd) > SAS_DPRINTK("Bus reset of %s failed 0x%x\n", > kobject_name(&phy->dev.kobj), > res); > + sas_put_local_phy(phy); > + > if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) > return SUCCESS; > > diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c > index cd88223..b68a653 100644 > --- a/drivers/scsi/mvsas/mv_sas.c > +++ b/drivers/scsi/mvsas/mv_sas.c > @@ -1474,10 +1474,11 @@ static int mvs_debug_issue_ssp_tmf(struct > domain_device *dev, > static int mvs_debug_I_T_nexus_reset(struct domain_device *dev) > { > int rc; > - struct sas_phy *phy = sas_find_local_phy(dev); > + struct sas_phy *phy = sas_get_local_phy(dev); > int reset_type = (dev->dev_type == SATA_DEV || > (dev->tproto & SAS_PROTOCOL_STP)) ? 0 : 1; > rc = sas_phy_reset(phy, reset_type); > + sas_put_local_phy(phy); > msleep(2000); > return rc; > } > diff --git a/drivers/scsi/pm8001/pm8001_sas.c > b/drivers/scsi/pm8001/pm8001_sas.c > index 5add18c..b111018 100644 > --- a/drivers/scsi/pm8001/pm8001_sas.c > +++ b/drivers/scsi/pm8001/pm8001_sas.c > @@ -873,12 +873,14 @@ int pm8001_I_T_nexus_reset(struct domain_device *dev) > > pm8001_dev = dev->lldd_dev; > pm8001_ha = pm8001_find_ha_by_dev(dev); > - phy = sas_find_local_phy(dev); > + phy = sas_get_local_phy(dev); > > if (dev_is_sata(dev)) { > DECLARE_COMPLETION_ONSTACK(completion_setstate); > - if (scsi_is_sas_phy_local(phy)) > - return 0; > + if (scsi_is_sas_phy_local(phy)) { > + rc = 0; > + goto out; > + } > rc = sas_phy_reset(phy, 1); > msleep(2000); > rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , > @@ -887,12 +889,14 @@ int pm8001_I_T_nexus_reset(struct domain_device *dev) > rc = PM8001_CHIP_DISP->set_dev_state_req(pm8001_ha, > pm8001_dev, 0x01); > wait_for_completion(&completion_setstate); > - } else{ > - rc = sas_phy_reset(phy, 1); > - msleep(2000); > + } else { > + rc = sas_phy_reset(phy, 1); > + msleep(2000); > } > PM8001_EH_DBG(pm8001_ha, pm8001_printk(" for device[%x]:rc=%d\n", > pm8001_dev->device_id, rc)); > + out: > + sas_put_local_phy(phy); > return rc; > } > > @@ -904,10 +908,11 @@ int pm8001_lu_reset(struct domain_device *dev, u8 *lun) > struct pm8001_device *pm8001_dev = dev->lldd_dev; > struct pm8001_hba_info *pm8001_ha = pm8001_find_ha_by_dev(dev); > if (dev_is_sata(dev)) { > - struct sas_phy *phy = sas_find_local_phy(dev); > + struct sas_phy *phy = sas_get_local_phy(dev); > rc = pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , > dev, 1, 0); > rc = sas_phy_reset(phy, 1); > + sas_put_local_phy(phy); > rc = PM8001_CHIP_DISP->set_dev_state_req(pm8001_ha, > pm8001_dev, 0x01); > msleep(2000); > diff --git a/drivers/scsi/scsi_transport_sas.c > b/drivers/scsi/scsi_transport_sas.c > index ab3bd0b..7d69a25 100644 > --- a/drivers/scsi/scsi_transport_sas.c > +++ b/drivers/scsi/scsi_transport_sas.c > @@ -1060,6 +1060,29 @@ int scsi_is_sas_port(const struct device *dev) > EXPORT_SYMBOL(scsi_is_sas_port); > > /** > + * sas_port_get_phy - try to take a reference on a port member > + * @port: port to check > + */ > +struct sas_phy *sas_port_get_phy(struct sas_port *port) > +{ > + struct sas_phy *phy; > + > + mutex_lock(&port->phy_list_mutex); > + if (list_empty(&port->phy_list)) > + phy = NULL; > + else { > + struct list_head *ent = port->phy_list.next; > + > + phy = list_entry(ent, typeof(*phy), port_siblings); > + get_device(&phy->dev); > + } > + mutex_unlock(&port->phy_list_mutex); > + > + return phy; > +} > +EXPORT_SYMBOL(sas_port_get_phy); > + > +/** > * sas_port_add_phy - add another phy to a port to form a wide port > * @port: port to add the phy to > * @phy: phy to add > diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h > index 3c9849c..571e7fc 100644 > --- a/include/scsi/libsas.h > +++ b/include/scsi/libsas.h > @@ -188,6 +188,7 @@ struct domain_device { > struct domain_device *parent; > struct list_head siblings; /* devices on the same level */ > struct asd_sas_port *port; /* shortcut to root of the tree */ > + struct sas_phy *phy; > > struct list_head dev_list_node; > struct list_head disco_list_node; > @@ -239,7 +240,6 @@ struct asd_sas_port { > struct list_head destroy_list; > enum sas_linkrate linkrate; > > - struct sas_phy *phy; > struct work_struct work; > > /* public: */ > @@ -424,6 +424,11 @@ static inline unsigned int to_sas_gpio_od(int device, int > bit) > return 3 * device + bit; > } > > +static inline void sas_put_local_phy(struct sas_phy *phy) > +{ > + put_device(&phy->dev); > +} > + > #ifdef CONFIG_SCSI_SAS_HOST_SMP > int try_test_sas_gpio_gp_bit(unsigned int od, u8 *data, u8 index, u8 count); > #else > @@ -679,7 +684,7 @@ extern int sas_smp_handler(struct Scsi_Host *shost, struct > sas_rphy *rphy, > > extern void sas_ssp_task_response(struct device *dev, struct sas_task *task, > struct ssp_response_iu *iu); > -struct sas_phy *sas_find_local_phy(struct domain_device *dev); > +struct sas_phy *sas_get_local_phy(struct domain_device *dev); > > int sas_request_addr(struct Scsi_Host *shost, u8 *addr); > > diff --git a/include/scsi/scsi_transport_sas.h > b/include/scsi/scsi_transport_sas.h > index 42817fa..98b3a20 100644 > --- a/include/scsi/scsi_transport_sas.h > +++ b/include/scsi/scsi_transport_sas.h > @@ -209,6 +209,12 @@ void sas_port_add_phy(struct sas_port *, struct sas_phy > *); > void sas_port_delete_phy(struct sas_port *, struct sas_phy *); > void sas_port_mark_backlink(struct sas_port *); > int scsi_is_sas_port(const struct device *); > +struct sas_phy *sas_port_get_phy(struct sas_port *port); > +static inline void sas_port_put_phy(struct sas_phy *phy) > +{ > + if (phy) > + put_device(&phy->dev); > +} > > extern struct scsi_transport_template * > sas_attach_transport(struct sas_function_template *); > > -- > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ide" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html