-------------------------------diff file----------------------------------------- diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index bb5a8d9f03ad..c9f3ed86a084 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -700,6 +700,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) irqreturn_t ret; u32 events; + printk("==== %s %d start running\n", __func__, __LINE__); ctrl->ist_running = true; pci_config_pm_runtime_get(pdev); @@ -755,6 +756,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) pci_config_pm_runtime_put(pdev); ctrl->ist_running = false; wake_up(&ctrl->requester); + printk("==== %s %d stop running\n", __func__, __LINE__); return ret; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 661f98c6c63a..ffa58f389456 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4784,6 +4784,7 @@ static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, if (active) msleep(20); rc = pcie_wait_for_link_status(pdev, false, active); + printk("======%s %d,wait for linksta:%d\n", __func__, __LINE__, rc); if (active) { if (rc) rc = pcie_failed_link_retrain(pdev); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2e40fc63ba31..b7e5af859517 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -337,12 +337,13 @@ void pci_bus_put(struct pci_bus *bus); #define PCIE_LNKCAP_SLS2SPEED(lnkcap) \ ({ \ - ((lnkcap) == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ - (lnkcap) == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ + u32 __lnkcap = (lnkcap) & PCI_EXP_LNKCAP_SLS; \ + (__lnkcap == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ + __lnkcap == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ + __lnkcap == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ + __lnkcap == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ + __lnkcap == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ + __lnkcap == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN); \ }) @@ -357,13 +358,16 @@ void pci_bus_put(struct pci_bus *bus); PCI_SPEED_UNKNOWN) #define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \ - ((lnkctl2) == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ - (lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ - PCI_SPEED_UNKNOWN) +({ \ + u16 __lnkctl2 = (lnkctl2) & PCI_EXP_LNKCTL2_TLS; \ + (__lnkctl2 == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ + __lnkctl2 == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ + __lnkctl2 == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ + __lnkctl2 == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ + __lnkctl2 == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ + __lnkctl2 == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ + PCI_SPEED_UNKNOWN); \ +}) /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index b59cacc740fa..a8ce09f67d3b 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -168,8 +168,10 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) return -EINVAL; - if (bus && bus->cur_bus_speed == speed_req) + if (bus && bus->cur_bus_speed == speed_req) { + printk("========== %s %d, speed has been set\n", __func__, __LINE__); return 0; + } target_speed = pcie_bwctrl_select_speed(port, speed_req); @@ -184,6 +186,7 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, mutex_lock(&data->set_speed_mutex); ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); + printk("========== %s %d, bwctl change speed ret:0x%x\n", __func__, __LINE__,ret); if (data) mutex_unlock(&data->set_speed_mutex); @@ -209,8 +212,10 @@ static void pcie_bwnotif_enable(struct pcie_device *srv) /* Count LBMS seen so far as one */ ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); - if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) + if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) { + printk("==== %s %d lbms_count++\n", __func__, __LINE__); atomic_inc(&data->lbms_count); + } pcie_capability_set_word(port, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); @@ -239,6 +244,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) int ret; ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + printk("==== %s %d(start running),link_status:0x%x\n", __func__, __LINE__,link_status); if (ret != PCIBIOS_SUCCESSFUL) return IRQ_NONE; @@ -246,8 +252,10 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) if (!events) return IRQ_NONE; - if (events & PCI_EXP_LNKSTA_LBMS) + if (events & PCI_EXP_LNKSTA_LBMS) { + printk("==== %s %d lbms_count++\n", __func__, __LINE__); atomic_inc(&data->lbms_count); + } pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); @@ -258,6 +266,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) * cleared to avoid missing link speed changes. */ pcie_update_link_speed(port->subordinate); + printk("==== %s %d(stop running),link_status:0x%x\n", __func__, __LINE__,link_status); return IRQ_HANDLED; } @@ -268,8 +277,10 @@ void pcie_reset_lbms_count(struct pci_dev *port) guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); data = port->link_bwctrl; - if (data) + if (data) { + printk("==== %s %d lbms_count set to 0\n", __func__, __LINE__); atomic_set(&data->lbms_count, 0); + } else pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 76f4df75b08a..a602f9aa5d6a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -41,8 +41,11 @@ static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta) int ret; ret = pcie_lbms_count(dev, &count); - if (ret < 0) + if (ret < 0) { + printk("==== %s %d lnksta(0x%x) & LBMS\n", __func__, __LINE__, lnksta); return lnksta & PCI_EXP_LNKSTA_LBMS; + } + printk("==== %s %d count:0x%lx\n", __func__, __LINE__, count); return count > 0; } @@ -110,6 +113,8 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + pci_info(dev, "============ %s %d, lnkctl2:0x%x, lnksta:0x%x\n", + __func__, __LINE__, lnkctl2, lnksta); if (!(lnksta & PCI_EXP_LNKSTA_DLLLA) && pcie_lbms_seen(dev, lnksta)) { u16 oldlnkctl2 = lnkctl2; @@ -121,9 +126,14 @@ int pcie_failed_link_retrain(struct pci_dev *dev) pcie_set_target_speed(dev, PCIE_LNKCTL2_TLS2SPEED(oldlnkctl2), true); return ret; + } else { + pci_info(dev, "retraining sucessfully, but now is in Gen 1\n"); } + pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2); pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + pci_info(dev, "============ %s %d, oldlnkctl2:0x%x,newlnkctl2:0x%x,newlnksta:0x%x\n", + __func__, __LINE__, oldlnkctl2, lnkctl2, lnksta); } if ((lnksta & PCI_EXP_LNKSTA_DLLLA) && -------------------------------diff file----------------------------------------- Based on the information in the log from 566.755596 to 567.801220, the issue has been reproduced. Between 566 and 567 seconds, the pcie_bwnotif_irq interrupt was triggered 4 times, this indicates that during this period, the NVMe drive was plugged and unplugged multiple times. Thanks, Regards, Jiwei > didn't explain LBMS (nor DLLLA) in the above sequence so it's hard to > follow what is going on here. LBMS in particular is of high interest here > because I'm trying to understand if something should clear it on the > hotplug side (there's already one call to clear it in remove_board()). > > In step 2 (pcie_set_target_speed() in step 1 succeeded), > pcie_failed_link_retrain() attempts to restore >2.5GT/s speed, this only > occurs when pci_match_id() matches. I guess you're trying to say that step > 2 is not taken because pci_match_id() is not matching but the wording > above is very confusing. > > Overall, I failed to understand the scenario here fully despite trying to > think it through over these few days. > >> the target link speed >> field of the Link Control >> 2 Register will keep 0x1. >> >> In order to fix the issue, don't do the retraining work except ASMedia >> ASM2824. >> >> Fixes: a89c82249c37 ("PCI: Work around PCIe link training failures") >> Reported-by: Adrian Huang <ahuang12@xxxxxxxxxx> >> Signed-off-by: Jiwei Sun <sunjw10@xxxxxxxxxx> >> --- >> drivers/pci/quirks.c | 6 ++++-- >> 1 file changed, 4 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c >> index 605628c810a5..ff04ebd9ae16 100644 >> --- a/drivers/pci/quirks.c >> +++ b/drivers/pci/quirks.c >> @@ -104,6 +104,9 @@ int pcie_failed_link_retrain(struct pci_dev *dev) >> u16 lnksta, lnkctl2; >> int ret = -ENOTTY; >> >> + if (!pci_match_id(ids, dev)) >> + return 0; >> + >> if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) || >> !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting) >> return ret; >> @@ -129,8 +132,7 @@ int pcie_failed_link_retrain(struct pci_dev *dev) >> } >> >> if ((lnksta & PCI_EXP_LNKSTA_DLLLA) && >> - (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT && >> - pci_match_id(ids, dev)) { >> + (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT) { >> u32 lnkcap; >> >> pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n"); >> >