In the current implementation, the PCI capability list is parsed from the beginning to find each capability, which results in a large number of redundant PCI reads. Instead, we can parse the complete list just once, store it in the pci_dev structure, and get the offset of each capability directly from the pci_dev structure. This implementation improves pci devices initialization time by ~2-3% (from 270ms to 261ms) in case of bare metal and 7-8% (From 201ms to 184ms) in case of VM running on ESXi. It also adds a memory overhead of 20bytes (value of PCI_CAP_ID_MAX) per PCI device. Ran pahole for pci_dev structure. This patch is not adding any padding bytes. Signed-off-by: Vikash Bansal <bvikas@xxxxxxxxxx> --- Changes in v2: - Ran pahole tool. - Modified comments to add "clock time". - Removed comments before call to pci_find_all_capabilities. drivers/pci/pci.c | 43 ++++++++++++++++++++++++++++++++++++------- drivers/pci/probe.c | 1 + include/linux/pci.h | 2 ++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9ecce435fb3f..b361788bcc27 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -468,6 +468,41 @@ static u8 __pci_bus_find_cap_start(struct pci_bus *bus, return 0; } + +/** + * pci_find_all_capabilities - Read all capabilities + * @dev: the PCI device + * + * Read all capabilities and store offsets in cap_off + * array in pci_dev structure. + */ +void pci_find_all_capabilities(struct pci_dev *dev) +{ + int ttl = PCI_FIND_CAP_TTL; + u16 ent; + u8 pos; + u8 id; + + pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type); + if (!pos) + return; + pci_bus_read_config_byte(dev->bus, dev->devfn, pos, &pos); + while (ttl--) { + if (pos < 0x40) + break; + pos &= ~3; + pci_bus_read_config_word(dev->bus, dev->devfn, pos, &ent); + id = ent & 0xff; + if (id == 0xff) + break; + + /* Read first instance of capability */ + if (!(dev->cap_off[id])) + dev->cap_off[id] = pos; + pos = (ent >> 8); + } +} + /** * pci_find_capability - query for devices' capabilities * @dev: PCI device to query @@ -489,13 +524,7 @@ static u8 __pci_bus_find_cap_start(struct pci_bus *bus, */ u8 pci_find_capability(struct pci_dev *dev, int cap) { - u8 pos; - - pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type); - if (pos) - pos = __pci_find_next_cap(dev->bus, dev->devfn, pos, cap); - - return pos; + return dev->cap_off[cap]; } EXPORT_SYMBOL(pci_find_capability); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 17a969942d37..b2fa5b2c42f6 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1831,6 +1831,7 @@ int pci_setup_device(struct pci_dev *dev) dev->hdr_type = hdr_type & 0x7f; dev->multifunction = !!(hdr_type & 0x80); dev->error_state = pci_channel_io_normal; + pci_find_all_capabilities(dev); set_pcie_port_type(dev); pci_set_of_node(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c..abcf7fdc4c98 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -335,6 +335,7 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ + u8 cap_off[PCI_CAP_ID_MAX]; /* Offsets of all pci capabilities */ #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ struct aer_stats *aer_stats; /* AER stats for this device */ @@ -1140,6 +1141,7 @@ void pci_sort_breadthfirst(void); u8 pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); u8 pci_find_capability(struct pci_dev *dev, int cap); +void pci_find_all_capabilities(struct pci_dev *dev); u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap); u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); -- 2.30.0