There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, the maximum payload size must be uniform on a given PCI-E fabric, and each device, bridge, and root port can have a different max size. To find and configure the optimal MPS settings, one must walk the fabric and determine the largest MPS available on all the devices on the given fabric. This also works around an issue with buggy BIOS revisions found on various whitebox motherboards which do not configure MPS beyond one level below the root port. For PCIE hotplug enabled slots not connected directly to a PCI-E root port, there can be problems when hotplugging devices. This is due to the possibility of hotplugging a device into the fabric with a smaller MPS that the devices currently running have configured. Modifying the MPS on the running devices could cause a fatal bus error due to an incoming frame being larger than the newly configured MPS. To work around this, the MPS for the entire fabric must be set to the minimum size. Any devices hotplugged into this fabric will have the minimum MPS set. If the PCI hotplug slot is directly connected to the root port and there are not other devices on the fabric (which seems to be the most common case), then this is not an issue and MPS discovery will occur as normal. Signed-off-by: Jon Mason <mason@xxxxxxxx> --- drivers/pci/hotplug/pcihp_slot.c | 58 +++++---------------- drivers/pci/probe.c | 106 ++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 3 +- 3 files changed, 122 insertions(+), 45 deletions(-) diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 749fdf0..ee4ff14 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -26,6 +26,19 @@ #include <linux/pci.h> #include <linux/pci_hotplug.h> +extern void pcie_set_maxpayloadsize(struct pci_bus *bus); + +static void pcie_determine_mps(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + + while (bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT && + !pci_is_root_bus(bus)) + bus = bus->parent; + + pcie_set_maxpayloadsize(bus); +} + static struct hpp_type0 pci_default_type0 = { .revision = 1, .cache_line_size = 8, @@ -158,47 +171,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ -static int pci_set_payload(struct pci_dev *dev) -{ - int pos, ppos; - u16 pctl, psz; - u16 dctl, dsz, dcap, dmax; - struct pci_dev *parent; - - parent = dev->bus->self; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - - /* Read Device MaxPayload capability and setting */ - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); - pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); - dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); - - /* Read Parent MaxPayload setting */ - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (!ppos) - return 0; - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - - /* If parent payload > device max payload -> error - * If parent payload > device payload -> set speed - * If parent payload <= device payload -> do nothing - */ - if (psz > dmax) - return -1; - else if (psz > dsz) { - dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, - (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + - (psz << 5)); - } - return 0; -} - void pci_configure_slot(struct pci_dev *dev) { struct pci_dev *cdev; @@ -210,9 +182,7 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; - ret = pci_set_payload(dev); - if (ret) - dev_warn(&dev->dev, "could not set device max payload\n"); + pcie_determine_mps(dev); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 48849ff..0986f0a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -860,6 +860,8 @@ void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; + pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); + pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; } void set_pcie_hotplug_bridge(struct pci_dev *pdev) @@ -1327,6 +1329,105 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) return nr; } +static void pcie_find_smpss(struct pci_bus *bus, u8 *smpss) +{ + struct pci_bus *child; + struct pci_dev *dev; + + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!pci_is_pcie(dev)) + continue; + + /* For PCIE hotplug enabled slots not connected directly to a + * PCI-E root port, there can be problems when hotplugging + * devices. This is due to the possibility of hotplugging a + * device into the fabric with a smaller MPS that the devices + * currently running have configured. Modifying the MPS on the + * running devices could cause a fatal bus error due to an + * incoming frame being larger than the newly configured MPS. + * To work around this, the MPS for the entire fabric must be + * set to the minimum size. Any devices hotpulled into this + * fabric will have the minimum MPS set. If the PCI hotplug + * slot is directly connected to the root port and there are not + * other devices on the fabric (which seems to be the most + * common case), then this is not an issue and MPS discovery + * will occur as normal. + */ + if (dev->is_hotplug_bridge) + smpss = 0; + + if (*smpss > dev->pcie_mpss) + *smpss = dev->pcie_mpss; + } + + list_for_each_entry(child, &bus->children, node) { + if (!pci_is_pcie(child->self)) + continue; + + pcie_find_smpss(child, smpss); + } +} + +static void pcie_write_mps(struct pci_dev *dev, u8 smpss) +{ + u16 dctl, mps; + + if (!pci_is_pcie(dev)) + return; + + pci_read_config_word(dev, dev->pcie_cap + PCI_EXP_DEVCTL, &dctl); + mps = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; + + dev_dbg(&dev->dev, "Dev MPS %x MPSS %x\n", mps, dev->pcie_mpss); + + if (smpss > mps) { + dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << smpss); + + pci_write_config_word(dev, dev->pcie_cap + PCI_EXP_DEVCTL, + (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) | + (smpss << 5)); + } + + dev->pcie_mpss = smpss; +} + +static void pcie_write_smps(struct pci_bus *bus, u8 smpss) +{ + struct pci_bus *child; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) + pcie_write_mps(dev, smpss); + + list_for_each_entry(child, &bus->children, node) { + if (!pci_is_pcie(child->self)) + continue; + + pcie_write_smps(child, smpss); + } +} + +void pcie_set_maxpayloadsize(struct pci_bus *bus) +{ + u8 smpss; + + if (!bus->self) + return; + + if (!pci_is_pcie(bus->self)) + return; + + if (bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT) + return; + + smpss = bus->self->pcie_mpss; + pcie_find_smpss(bus, &smpss); + + pcie_write_mps(bus->self, smpss); + pcie_write_smps(bus, smpss); +} + unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) { unsigned int devfn, pass, max = bus->secondary; @@ -1359,6 +1460,11 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) max = pci_scan_bridge(bus, dev, max, pass); } + /* After the bus has been walked and all devices discovered, determine + * the MPS of the fabric and set it. + */ + pcie_set_maxpayloadsize(bus); + /* * We've scanned the bus and so we know all about what's on * the other side of any bridges that may be on this bus plus diff --git a/include/linux/pci.h b/include/linux/pci.h index c446b5c..ee8451e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -251,7 +251,8 @@ struct pci_dev { u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 pcie_cap; /* PCI-E capability offset */ - u8 pcie_type; /* PCI-E device/port type */ + u8 pcie_type:4; /* PCI-E device/port type */ + u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ -- 1.7.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html