On Wed, Oct 14, 2020 at 5:39 PM Ian Kumlien <ian.kumlien@xxxxxxxxx> wrote: > > On Wed, Oct 14, 2020 at 4:36 PM Bjorn Helgaas <helgaas@xxxxxxxxxx> wrote: > > > > On Wed, Oct 14, 2020 at 03:33:17PM +0200, Ian Kumlien wrote: > > > > > I'm actually starting to think that reporting what we do with the > > > latency bit could > > > be beneficial - i.e. report which links have their L1 disabled due to > > > which device... > > > > > > I also think that this could benefit debugging - I have no clue of how > > > to read the lspci:s - I mean i do see some differences that might be > > > the fix but nothing really specific without a proper message in > > > dmesg.... > > > > Yeah, might be worth doing. Probably pci_dbg() since I think it would > > be too chatty to be enabled all the time. > > OK, will look in to it =) Just did some very basic hack, since i think it's much better to get the information in dmesg than have to boot will full debug. I assume that there is a niftier way to do it - but i wanted some feedback basically... My current output is: dmesg |grep latency [ 0.817872] pci 0000:04:00.0: ASPM latency exceeded, disabling: L1:0000:01:00.0-0000:00:01.2 diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 253c30cc1967..5a5146f47aae 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -434,7 +434,8 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev, static void pcie_aspm_check_latency(struct pci_dev *endpoint) { - u32 latency, l1_switch_latency = 0; + u32 latency, l1_max_latency = 0, l1_switch_latency = 0; + bool aspm_disable = 0; struct aspm_latency *acceptable; struct pcie_link_state *link; @@ -446,6 +447,16 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) link = endpoint->bus->self->link_state; acceptable = &link->acceptable[PCI_FUNC(endpoint->devfn)]; +#define aspm_info(device, type, down, up) \ + if (!aspm_disable) \ + { \ + pr_cont("pci %s: ASPM latency exceeded, disabling: %s:%s-%s", \ + pci_name(device), type, pci_name(down), pci_name(up)); \ + aspm_disable = 1; \ + } \ + else \ + pr_cont(", %s:%s-%s", type, pci_name(down), pci_name(up)); + while (link) { /* Check upstream direction L0s latency */ if ((link->aspm_capable & ASPM_STATE_L0S_UP) && @@ -456,10 +467,14 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) if ((link->aspm_capable & ASPM_STATE_L0S_DW) && (link->latency_dw.l0s > acceptable->l0s)) link->aspm_capable &= ~ASPM_STATE_L0S_DW; + /* * Check L1 latency. - * Every switch on the path to root complex need 1 - * more microsecond for L1. Spec doesn't mention L0s. + * + * PCIe r5.0, sec 5.4.1.2.2 states: + * A Switch is required to initiate an L1 exit transition on its + * Upstream Port Link after no more than 1 μs from the beginning of an + * L1 exit transition on any of its Downstream Port Links. * * The exit latencies for L1 substates are not advertised * by a device. Since the spec also doesn't mention a way @@ -469,14 +484,22 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) * L1 exit latencies advertised by a device include L1 * substate latencies (and hence do not do any check). */ - latency = max_t(u32, link->latency_up.l1, link->latency_dw.l1); - if ((link->aspm_capable & ASPM_STATE_L1) && - (latency + l1_switch_latency > acceptable->l1)) - link->aspm_capable &= ~ASPM_STATE_L1; - l1_switch_latency += 1000; + if (link->aspm_capable & ASPM_STATE_L1) { + latency = max_t(u32, link->latency_up.l1, link->latency_dw.l1); + l1_max_latency = max_t(u32, latency, l1_max_latency); + if (l1_max_latency + l1_switch_latency > acceptable->l1) + { + aspm_info(endpoint, "L1", link->downstream, link->pdev); + link->aspm_capable &= ~ASPM_STATE_L1; + } + l1_switch_latency += 1000; + } link = link->parent; } + if (aspm_disable) + pr_cont("\n"); +#undef aspm_info } for L1 and L0s, which we haven't discussed that much yet: diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5a5146f47aae..b01d393e742d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -434,7 +434,8 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev, static void pcie_aspm_check_latency(struct pci_dev *endpoint) { - u32 latency, l1_max_latency = 0, l1_switch_latency = 0; + u32 latency, l1_max_latency = 0, l1_switch_latency = 0, + l0s_latency_up = 0, l0s_latency_dw = 0; bool aspm_disable = 0; struct aspm_latency *acceptable; struct pcie_link_state *link; @@ -459,14 +460,24 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) while (link) { /* Check upstream direction L0s latency */ - if ((link->aspm_capable & ASPM_STATE_L0S_UP) && - (link->latency_up.l0s > acceptable->l0s)) - link->aspm_capable &= ~ASPM_STATE_L0S_UP; + if (link->aspm_capable & ASPM_STATE_L0S_UP) { + l0s_latency_up += link->latency_up.l0s; + if (l0s_latency_up > acceptable->l0s) + { + aspm_info(endpoint, "L0s-up", link->downstream, link->pdev); + link->aspm_capable &= ~ASPM_STATE_L0S_UP; + } + } /* Check downstream direction L0s latency */ - if ((link->aspm_capable & ASPM_STATE_L0S_DW) && - (link->latency_dw.l0s > acceptable->l0s)) - link->aspm_capable &= ~ASPM_STATE_L0S_DW; + if (link->aspm_capable & ASPM_STATE_L0S_DW) { + l0s_latency_dw += link->latency_dw.l0s; + if (l0s_latency_dw > acceptable->l0s) + { + aspm_info(endpoint, "L0s-dw", link->downstream, link->pdev); + link->aspm_capable &= ~ASPM_STATE_L0S_DW; + } + } /* * Check L1 latency.