On 23/02/22, 5:20 PM, "Ajay Kaher" <akaher@xxxxxxxxxx> wrote: > From: Vikash Bansal <bvikas@xxxxxxxxxx> Bjorn, as per your guidance we sent v3 (hope it's as per your expectation). Please let us know if you are looking for more improvement or wanted us to re-sent this patch. -Ajay > In the current implementation, the PCI capability list is parsed from > the beginning to find each capability, which results in a large number > of redundant PCI reads. > > Instead, we can parse the complete list just once, store it in the > pci_dev structure, and get the offset of each capability directly from > the pci_dev structure. > > This implementation improves pci devices initialization time by ~2-3% > (from 270ms to 261ms) in case of bare metal and 7-8% (From 201ms to 184ms) > in case of VM running on ESXi. > > It also adds a memory overhead of 20bytes (value of PCI_CAP_ID_MAX) per > PCI device. > > Ran pahole for pci_dev structure. This patch is not adding any padding > bytes. > > Signed-off-by: Vikash Bansal <bvikas@xxxxxxxxxx> > Signed-off-by: Ajay Kaher <akaher@xxxxxxxxxx> > > --- > > Changes in v3: > - Added check before access cap_off from pci_find_capability(). > - Moved declaration of pci_find_all_capabilities() to drivers/pci/pci.h. > > Changes in v2: > - Ran pahole tool. > - Modified comments to add "clock time". > - Removed comments before call to pci_find_all_capabilities. > > --- > drivers/pci/pci.c | 44 +++++++++++++++++++++++++++++++++++++------- > drivers/pci/pci.h | 1 + > drivers/pci/probe.c | 1 + > include/linux/pci.h | 1 + > 4 files changed, 40 insertions(+), 7 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index 3d2fb39..cf54811 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -469,6 +469,40 @@ static u8 __pci_bus_find_cap_start(struct pci_bus *bus, > } > > /** > + * pci_find_all_capabilities - Read all capabilities > + * @dev: the PCI device > + * > + * Read all capabilities and store offsets in cap_off > + * array in pci_dev structure. > + */ > +void pci_find_all_capabilities(struct pci_dev *dev) > +{ > + int ttl = PCI_FIND_CAP_TTL; > + u16 ent; > + u8 pos; > + u8 id; > + > + pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type); > + if (!pos) > + return; > + pci_bus_read_config_byte(dev->bus, dev->devfn, pos, &pos); > + while (ttl--) { > + if (pos < 0x40) > + break; > + pos &= ~3; > + pci_bus_read_config_word(dev->bus, dev->devfn, pos, &ent); > + id = ent & 0xff; > + if (id == 0xff) > + break; > + > + /* Read first instance of capability */ > + if (!(dev->cap_off[id])) > + dev->cap_off[id] = pos; > + pos = (ent >> 8); > + } > +} > + > +/** > * pci_find_capability - query for devices' capabilities > * @dev: PCI device to query > * @cap: capability code > @@ -489,13 +523,9 @@ static u8 __pci_bus_find_cap_start(struct pci_bus *bus, > */ > u8 pci_find_capability(struct pci_dev *dev, int cap) > { > - u8 pos; > - > - pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type); > - if (pos) > - pos = __pci_find_next_cap(dev->bus, dev->devfn, pos, cap); > - > - return pos; > + if(cap >= PCI_CAP_ID_MAX) > + return 0; > + return dev->cap_off[cap]; > } > EXPORT_SYMBOL(pci_find_capability); > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 3d60cab..3cb70d5 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -87,6 +87,7 @@ bool pci_bridge_d3_possible(struct pci_dev *dev); > void pci_bridge_d3_update(struct pci_dev *dev); > void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev); > void pci_bridge_reconfigure_ltr(struct pci_dev *dev); > +void pci_find_all_capabilities(struct pci_dev *dev); > > static inline void pci_wakeup_event(struct pci_dev *dev) > { > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > index 087d365..d75e1fa 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -1839,6 +1839,7 @@ int pci_setup_device(struct pci_dev *dev) > dev->hdr_type = hdr_type & 0x7f; > dev->multifunction = !!(hdr_type & 0x80); > dev->error_state = pci_channel_io_normal; > + pci_find_all_capabilities(dev); > set_pcie_port_type(dev); > > pci_set_of_node(dev); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 18a75c8..bd61cee 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -326,6 +326,7 @@ struct pci_dev { > unsigned int class; /* 3 bytes: (base,sub,prog-if) */ > u8 revision; /* PCI revision, low byte of class word */ > u8 hdr_type; /* PCI header type (`multi' flag masked out) */ > + u8 cap_off[PCI_CAP_ID_MAX]; /* Offsets of all pci capabilities */ > #ifdef CONFIG_PCIEAER > u16 aer_cap; /* AER capability offset */ > struct aer_stats *aer_stats; /* AER stats for this device */ > -- > 2.7.4