On Mon, Apr 13, 2020 at 11:40 AM Deepak Ukey <deepak.ukey@xxxxxxxxxxxxx> wrote: > > From: peter chang <dpf@xxxxxxxxxx> > > Until udev rolls out we can't proceed past module loading w/ device > discovery in progress because things like the init scripts only work > over the currently discovered block devices. Just curious, what's this udev rollout? The drivers for disk > controllers have various forms of 'barriers' to prevent this from > happening depending on their underlying support libraries. > The host's scan finish waits for the libsas queue to drain. However, > if the PHYs are still in the process of starting then the queue will > be empty. This means that we declare the scan finished before it has > even started. Here we wait for various events from the firmware-side, > and even though we disable staggered spinup we still pretend like > it's there. > > Signed-off-by: Deepak Ukey <deepak.ukey@xxxxxxxxxxxxx> > Signed-off-by: Viswas G <Viswas.G@xxxxxxxxxxxxx> > Signed-off-by: Radha Ramachandran <radha@xxxxxxxxxx> > Signed-off-by: peter chang <dpf@xxxxxxxxxx> > --- > drivers/scsi/pm8001/pm8001_ctl.c | 36 +++++++++++++++++++++ > drivers/scsi/pm8001/pm8001_defs.h | 6 ++-- > drivers/scsi/pm8001/pm8001_init.c | 25 +++++++++++++++ > drivers/scsi/pm8001/pm8001_sas.c | 61 +++++++++++++++++++++++++++++++++-- > drivers/scsi/pm8001/pm8001_sas.h | 3 ++ > drivers/scsi/pm8001/pm80xx_hwi.c | 67 ++++++++++++++++++++++++++++++++------- > 6 files changed, 181 insertions(+), 17 deletions(-) > > diff --git a/drivers/scsi/pm8001/pm8001_ctl.c b/drivers/scsi/pm8001/pm8001_ctl.c > index 3c9f42779dd0..eae629610a5f 100644 > --- a/drivers/scsi/pm8001/pm8001_ctl.c > +++ b/drivers/scsi/pm8001/pm8001_ctl.c > @@ -88,6 +88,41 @@ static ssize_t controller_fatal_error_show(struct device *cdev, > } > static DEVICE_ATTR_RO(controller_fatal_error); > > +/** > + * phy_startup_timeout_show - per-phy discovery timeout > + * @cdev: pointer to embedded class device > + * @buf: the buffer returned > + * > + * A sysfs 'read/write' shost attribute. > + */ > +static ssize_t phy_startup_timeout_show(struct device *cdev, > + struct device_attribute *attr, char *buf) > +{ > + struct Scsi_Host *shost = class_to_shost(cdev); > + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); > + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; > + > + return snprintf(buf, PAGE_SIZE, "%08xh\n", > + pm8001_ha->phy_startup_timeout / HZ); > +} > + > +static ssize_t phy_startup_timeout_store(struct device *cdev, > + struct device_attribute *attr, const char *buf, size_t count) > +{ > + struct Scsi_Host *shost = class_to_shost(cdev); > + struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); > + struct pm8001_hba_info *pm8001_ha = sha->lldd_ha; > + int val = 0; > + > + if (kstrtoint(buf, 0, &val) < 0) > + return -EINVAL; > + > + pm8001_ha->phy_startup_timeout = val * HZ; > + return strlen(buf); > +} > + > +static DEVICE_ATTR_RW(phy_startup_timeout); > + > /** > * pm8001_ctl_fw_version_show - firmware version > * @cdev: pointer to embedded class device > @@ -867,6 +902,7 @@ static DEVICE_ATTR(update_fw, S_IRUGO|S_IWUSR|S_IWGRP, > struct device_attribute *pm8001_host_attrs[] = { > &dev_attr_interface_rev, > &dev_attr_controller_fatal_error, > + &dev_attr_phy_startup_timeout, > &dev_attr_fw_version, > &dev_attr_update_fw, > &dev_attr_aap_log, > diff --git a/drivers/scsi/pm8001/pm8001_defs.h b/drivers/scsi/pm8001/pm8001_defs.h > index fd700ce5e80c..aaeabb2f2808 100644 > --- a/drivers/scsi/pm8001/pm8001_defs.h > +++ b/drivers/scsi/pm8001/pm8001_defs.h > @@ -141,7 +141,9 @@ enum pm8001_hba_info_flags { > */ > #define PHY_LINK_DISABLE 0x00 > #define PHY_LINK_DOWN 0x01 > -#define PHY_STATE_LINK_UP_SPCV 0x2 > -#define PHY_STATE_LINK_UP_SPC 0x1 > +#define PHY_STATE_LINK_UP_SPCV 0x02 > +#define PHY_STATE_LINK_UP_SPC 0x01 > +#define PHY_STATE_LINK_RESET 0x03 > +#define PHY_STATE_HARD_RESET 0x04 > > #endif > diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c > index 6cbb8fa74456..560dd9c3f745 100644 > --- a/drivers/scsi/pm8001/pm8001_init.c > +++ b/drivers/scsi/pm8001/pm8001_init.c > @@ -61,6 +61,30 @@ MODULE_PARM_DESC(staggered_spinup, "enable the staggered spinup feature.\n" > " 0/N: false\n" > " 1/Y: true\n"); > > +/* if nothing is detected, the PHYs will reset continuously once they > + * are started. we don't have a good way of differentiating a trained > + * but waiting-on-signature from one that's never going to train > + * (nothing attached or dead drive), so we wait an possibly > + * unreasonable amount of time. this is stuck in start_timeout, and > + * checked in the host's scan_finished callback for PHYs that haven't > + * yet come up. > + * > + * the timeout here was experimentally determined by looking at our > + * current worst-case spin-up drive (seagate 8T) which has > + * the most drive-to-drive variance, some issues coming up from the > + * sleep state (randomly applied ~10s delay to non-data operations), > + * and errors from IDENTIFY. > + * > + * NB: this a workaround to handle current lack of udev. once > + * that's everywhere and dynamically dealing w/ device add/remove > + * (step one doesn't deal w/ this later condition) then the patches > + * can be removed. If it's just a workaround for missing proper udev rule, I think we shouldnt' include it in upstream. > + */ > +static ulong phy_startup_timeout = 60; > +module_param(phy_startup_timeout, ulong, 0644); > +MODULE_PARM_DESC(phy_startup_timeout_s, > + " seconds to wait for discovery, per-PHY."); > + > static struct scsi_transport_template *pm8001_stt; > > /** > @@ -493,6 +517,7 @@ static struct pm8001_hba_info *pm8001_pci_alloc(struct pci_dev *pdev, > pm8001_ha->id = pm8001_id++; > pm8001_ha->logging_level = logging_level; > pm8001_ha->staggered_spinup = staggered_spinup; > + pm8001_ha->phy_startup_timeout = phy_startup_timeout * HZ; > pm8001_ha->non_fatal_count = 0; > if (link_rate >= 1 && link_rate <= 15) > pm8001_ha->link_rate = (link_rate << 8); > diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c > index 806845203602..470fe4dd3b52 100644 > --- a/drivers/scsi/pm8001/pm8001_sas.c > +++ b/drivers/scsi/pm8001/pm8001_sas.c > @@ -39,6 +39,7 @@ > */ > > #include <linux/slab.h> > +#include "pm80xx_hwi.h" > #include "pm8001_sas.h" > > /** > @@ -257,6 +258,54 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, > return rc; > } > > +static int pm8001_update_phy_mask(struct pm8001_hba_info *pm8001_ha) > +{ > + struct phy_profile *profile = &pm8001_ha->phy_profile_resp.phy.status; > + DECLARE_COMPLETION_ONSTACK(comp); > + unsigned long timeout = msecs_to_jiffies(2000); > + int ret = 0; > + int i; > + > + for (i = 0; i < pm8001_ha->chip->n_phy; i++) { > + struct pm8001_phy *phy = pm8001_ha->phy + i; > + > + if (pm8001_ha->phy_mask & (1 << i) && phy->phy_state) { > + pm8001_ha->phyprofile_completion = ∁ Ok, you have phyprofile_completion init here, I think it should be moved to first patch. Thanks!