FYI, This patch's hotplug code hasn't been tested. Jordan, could you please verify it for me? Thanks, Jon On Mon, Jun 13, 2011 at 04:34:56PM -0500, Jon Mason wrote: > There is a sizable performance boost for having the largest possible > maximum payload size on each PCI-E device. However, the maximum payload > size must be uniform on a given PCI-E fabric, and each device, bridge, > and root port can have a different max size. To find and configure the > optimal MPS settings, one must walk the fabric and determine the largest > MPS available on all the devices on the given fabric. > > This also works around an issue with buggy BIOS revisions found on > various whitebox motherboards which do not configure MPS beyond one > level below the root port. > > For PCIE hotplug enabled slots not connected directly to a PCI-E root > port, there can be problems when hotplugging devices. This is due to > the possibility of hotplugging a device into the fabric with a smaller > MPS that the devices currently running have configured. Modifying the > MPS on the running devices could cause a fatal bus error due to an > incoming frame being larger than the newly configured MPS. To work > around this, the MPS for the entire fabric must be set to the minimum > size. Any devices hotplugged into this fabric will have the minimum MPS > set. If the PCI hotplug slot is directly connected to the root port and > there are not other devices on the fabric (which seems to be the most > common case), then this is not an issue and MPS discovery will occur as > normal. > > Signed-off-by: Jon Mason <mason@xxxxxxxx> > --- > drivers/pci/hotplug/pcihp_slot.c | 58 +++++---------------- > drivers/pci/probe.c | 106 ++++++++++++++++++++++++++++++++++++++ > include/linux/pci.h | 3 +- > 3 files changed, 122 insertions(+), 45 deletions(-) > > diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c > index 749fdf0..ee4ff14 100644 > --- a/drivers/pci/hotplug/pcihp_slot.c > +++ b/drivers/pci/hotplug/pcihp_slot.c > @@ -26,6 +26,19 @@ > #include <linux/pci.h> > #include <linux/pci_hotplug.h> > > +extern void pcie_set_maxpayloadsize(struct pci_bus *bus); > + > +static void pcie_determine_mps(struct pci_dev *dev) > +{ > + struct pci_bus *bus = dev->bus; > + > + while (bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT && > + !pci_is_root_bus(bus)) > + bus = bus->parent; > + > + pcie_set_maxpayloadsize(bus); > +} > + > static struct hpp_type0 pci_default_type0 = { > .revision = 1, > .cache_line_size = 8, > @@ -158,47 +171,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) > */ > } > > -/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ > -static int pci_set_payload(struct pci_dev *dev) > -{ > - int pos, ppos; > - u16 pctl, psz; > - u16 dctl, dsz, dcap, dmax; > - struct pci_dev *parent; > - > - parent = dev->bus->self; > - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); > - if (!pos) > - return 0; > - > - /* Read Device MaxPayload capability and setting */ > - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); > - pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); > - dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; > - dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); > - > - /* Read Parent MaxPayload setting */ > - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); > - if (!ppos) > - return 0; > - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); > - psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; > - > - /* If parent payload > device max payload -> error > - * If parent payload > device payload -> set speed > - * If parent payload <= device payload -> do nothing > - */ > - if (psz > dmax) > - return -1; > - else if (psz > dsz) { > - dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); > - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, > - (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + > - (psz << 5)); > - } > - return 0; > -} > - > void pci_configure_slot(struct pci_dev *dev) > { > struct pci_dev *cdev; > @@ -210,9 +182,7 @@ void pci_configure_slot(struct pci_dev *dev) > (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) > return; > > - ret = pci_set_payload(dev); > - if (ret) > - dev_warn(&dev->dev, "could not set device max payload\n"); > + pcie_determine_mps(dev); > > memset(&hpp, 0, sizeof(hpp)); > ret = pci_get_hp_params(dev, &hpp); > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > index 48849ff..0986f0a 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -860,6 +860,8 @@ void set_pcie_port_type(struct pci_dev *pdev) > pdev->pcie_cap = pos; > pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); > pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; > + pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); > + pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; > } > > void set_pcie_hotplug_bridge(struct pci_dev *pdev) > @@ -1327,6 +1329,105 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) > return nr; > } > > +static void pcie_find_smpss(struct pci_bus *bus, u8 *smpss) > +{ > + struct pci_bus *child; > + struct pci_dev *dev; > + > + > + list_for_each_entry(dev, &bus->devices, bus_list) { > + if (!pci_is_pcie(dev)) > + continue; > + > + /* For PCIE hotplug enabled slots not connected directly to a > + * PCI-E root port, there can be problems when hotplugging > + * devices. This is due to the possibility of hotplugging a > + * device into the fabric with a smaller MPS that the devices > + * currently running have configured. Modifying the MPS on the > + * running devices could cause a fatal bus error due to an > + * incoming frame being larger than the newly configured MPS. > + * To work around this, the MPS for the entire fabric must be > + * set to the minimum size. Any devices hotpulled into this > + * fabric will have the minimum MPS set. If the PCI hotplug > + * slot is directly connected to the root port and there are not > + * other devices on the fabric (which seems to be the most > + * common case), then this is not an issue and MPS discovery > + * will occur as normal. > + */ > + if (dev->is_hotplug_bridge) > + smpss = 0; > + > + if (*smpss > dev->pcie_mpss) > + *smpss = dev->pcie_mpss; > + } > + > + list_for_each_entry(child, &bus->children, node) { > + if (!pci_is_pcie(child->self)) > + continue; > + > + pcie_find_smpss(child, smpss); > + } > +} > + > +static void pcie_write_mps(struct pci_dev *dev, u8 smpss) > +{ > + u16 dctl, mps; > + > + if (!pci_is_pcie(dev)) > + return; > + > + pci_read_config_word(dev, dev->pcie_cap + PCI_EXP_DEVCTL, &dctl); > + mps = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; > + > + dev_dbg(&dev->dev, "Dev MPS %x MPSS %x\n", mps, dev->pcie_mpss); > + > + if (smpss > mps) { > + dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << smpss); > + > + pci_write_config_word(dev, dev->pcie_cap + PCI_EXP_DEVCTL, > + (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) | > + (smpss << 5)); > + } > + > + dev->pcie_mpss = smpss; > +} > + > +static void pcie_write_smps(struct pci_bus *bus, u8 smpss) > +{ > + struct pci_bus *child; > + struct pci_dev *dev; > + > + list_for_each_entry(dev, &bus->devices, bus_list) > + pcie_write_mps(dev, smpss); > + > + list_for_each_entry(child, &bus->children, node) { > + if (!pci_is_pcie(child->self)) > + continue; > + > + pcie_write_smps(child, smpss); > + } > +} > + > +void pcie_set_maxpayloadsize(struct pci_bus *bus) > +{ > + u8 smpss; > + > + if (!bus->self) > + return; > + > + if (!pci_is_pcie(bus->self)) > + return; > + > + if (bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT) > + return; > + > + smpss = bus->self->pcie_mpss; > + pcie_find_smpss(bus, &smpss); > + > + pcie_write_mps(bus->self, smpss); > + pcie_write_smps(bus, smpss); > +} > + > unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) > { > unsigned int devfn, pass, max = bus->secondary; > @@ -1359,6 +1460,11 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) > max = pci_scan_bridge(bus, dev, max, pass); > } > > + /* After the bus has been walked and all devices discovered, determine > + * the MPS of the fabric and set it. > + */ > + pcie_set_maxpayloadsize(bus); > + > /* > * We've scanned the bus and so we know all about what's on > * the other side of any bridges that may be on this bus plus > diff --git a/include/linux/pci.h b/include/linux/pci.h > index c446b5c..ee8451e 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -251,7 +251,8 @@ struct pci_dev { > u8 revision; /* PCI revision, low byte of class word */ > u8 hdr_type; /* PCI header type (`multi' flag masked out) */ > u8 pcie_cap; /* PCI-E capability offset */ > - u8 pcie_type; /* PCI-E device/port type */ > + u8 pcie_type:4; /* PCI-E device/port type */ > + u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ > u8 rom_base_reg; /* which config register controls the ROM */ > u8 pin; /* which interrupt pin this device uses */ > > -- > 1.7.5.2 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html