Found one PCI Express Modules has link training error after hotplug. It turns out that after DLLLA is set, LT is still set for a while. So pciehp will delcare that hotplug fail in 1s. HW guys say that pciehp is against PCI-e SPEC: From PCI Express Base Specification Revision 2.1, Section 6.7.3.3: Software must allow 1 second after the Data Link Layer Link Active bit reads 1b before it is permitted to determine that a hot plugged device which fails to return a Successful Completion for a Valid Configuration Request is a broken device (see section 6.6). Try to wait for long enough by adding LT checking in 1s. Signed-off-by: Yinghai Lu <yinghai.lu@xxxxxxxxxx> --- drivers/pci/hotplug/pciehp_hpc.c | 52 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 15 deletions(-) Index: linux-2.6/drivers/pci/hotplug/pciehp_hpc.c =================================================================== --- linux-2.6.orig/drivers/pci/hotplug/pciehp_hpc.c +++ linux-2.6/drivers/pci/hotplug/pciehp_hpc.c @@ -241,28 +241,49 @@ static int pcie_write_cmd(struct control return retval; } -static inline int check_link_active(struct controller *ctrl) +static inline bool +check_link_status_bits(struct controller *ctrl, u16 mask, u16 val) { u16 link_status; if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &link_status)) - return 0; - return !!(link_status & PCI_EXP_LNKSTA_DLLLA); + return false; + + if ((link_status & mask) == val) + return true; + + return false; } -static void pcie_wait_link_active(struct controller *ctrl) +static bool +pcie_wait_link_status_bits(struct controller *ctrl, u16 mask, u16 val) { int timeout = 1000; - if (check_link_active(ctrl)) - return; + if (check_link_status_bits(ctrl, mask, val)) + return true; while (timeout > 0) { - msleep(10); - timeout -= 10; - if (check_link_active(ctrl)) - return; + msleep(20); + timeout -= 20; + if (check_link_status_bits(ctrl, mask, val)) + return true; } - ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n"); + + return false; +} + +static void pcie_wait_link_active(struct controller *ctrl) +{ + if (!pcie_wait_link_status_bits(ctrl, PCI_EXP_LNKSTA_DLLLA, + PCI_EXP_LNKSTA_DLLLA)) + ctrl_dbg(ctrl, + "Data Link Layer Link Active not set in 1000 msec\n"); +} + +static void pcie_wait_link_training_done(struct controller *ctrl) +{ + if (!pcie_wait_link_status_bits(ctrl, PCI_EXP_LNKSTA_LT, 0)) + ctrl_dbg(ctrl, "Link Training is not done in 1000 msec\n"); } int pciehp_check_link_status(struct controller *ctrl) @@ -275,10 +296,11 @@ int pciehp_check_link_status(struct cont * hot-plug capable downstream port. But old controller might * not implement it. In this case, we wait for 1000 ms. */ - if (ctrl->link_active_reporting) - pcie_wait_link_active(ctrl); - else - msleep(1000); + if (ctrl->link_active_reporting) { + pcie_wait_link_active(ctrl); + pcie_wait_link_training_done(ctrl); + } else + msleep(1000); retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status); if (retval) { -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html