On Fri, Oct 18, 2024 at 01:47:28PM +0800, Guixin Liu wrote: > PCI driver will traverse pci device list in pci_seq_start in every > sequential file reading, use xarry to store all pci devices to > accelerate finding the start. > > Use "time cat /proc/bus/pci/devices" to test on a machine with 13k > pci devices, get an increase of about 90%. s/pci/PCI/ (several times) s/pci_seq_start/pci_seq_start()/ s/xarry/xarray/ s/, /, / (remove extra space) > Without this patch: > real 0m0.917s > user 0m0.000s > sys 0m0.913s > With this patch: > real 0m0.093s > user 0m0.000s > sys 0m0.093s Nice speedup, for sure! > Signed-off-by: Guixin Liu <kanie@xxxxxxxxxxxxxxxxx> > --- > drivers/pci/pci.h | 3 +++ > drivers/pci/probe.c | 1 + > drivers/pci/proc.c | 64 +++++++++++++++++++++++++++++++++++++++----- > drivers/pci/remove.c | 1 + > include/linux/pci.h | 2 ++ > 5 files changed, 64 insertions(+), 7 deletions(-) > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 14d00ce45bfa..1a7da91eeb80 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -962,4 +962,7 @@ void pcim_release_region(struct pci_dev *pdev, int bar); > (PCI_CONF1_ADDRESS(bus, dev, func, reg) | \ > PCI_CONF1_EXT_REG(reg)) > > +void pci_seq_tree_add_dev(struct pci_dev *dev); > +void pci_seq_tree_remove_dev(struct pci_dev *dev); > + > #endif /* DRIVERS_PCI_H */ > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c > index 4f68414c3086..1fd9e9022f70 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -2592,6 +2592,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) > WARN_ON(ret < 0); > > pci_npem_create(dev); > + pci_seq_tree_add_dev(dev); > } > > struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn) > diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c > index f967709082d6..30ca071ccad5 100644 > --- a/drivers/pci/proc.c > +++ b/drivers/pci/proc.c > @@ -19,6 +19,9 @@ > > static int proc_initialized; /* = 0 */ > > +DEFINE_XARRAY_FLAGS(pci_seq_tree, 0); > +static unsigned long pci_max_idx; > + > static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence) > { > struct pci_dev *dev = pde_data(file_inode(file)); > @@ -334,25 +337,72 @@ static const struct proc_ops proc_bus_pci_ops = { > #endif /* HAVE_PCI_MMAP */ > }; > > +void pci_seq_tree_add_dev(struct pci_dev *dev) > +{ > + int ret; > + > + if (dev) { I don't think we should test "dev" for NULL here. If it's NULL, I think we have bigger problems and we should oops. > + xa_lock(&pci_seq_tree); > + pci_dev_get(dev); > + ret = __xa_insert(&pci_seq_tree, pci_max_idx, dev, GFP_KERNEL); > + if (!ret) { > + dev->proc_seq_idx = pci_max_idx; > + pci_max_idx++; > + } else { > + pci_dev_put(dev); > + WARN_ON(ret); > + } > + xa_unlock(&pci_seq_tree); > + } > +} > + > +void pci_seq_tree_remove_dev(struct pci_dev *dev) > +{ > + unsigned long idx = dev->proc_seq_idx; > + struct pci_dev *latest_dev = NULL; > + struct pci_dev *ret; > + > + if (!dev) > + return; Same comment about testing "dev" for NULL. > + xa_lock(&pci_seq_tree); > + __xa_erase(&pci_seq_tree, idx); > + pci_dev_put(dev); > + /* > + * Move the latest pci_dev to this idx to keep the continuity. > + */ > + if (idx != pci_max_idx - 1) { > + latest_dev = __xa_erase(&pci_seq_tree, pci_max_idx - 1); > + ret = __xa_cmpxchg(&pci_seq_tree, idx, NULL, latest_dev, > + GFP_KERNEL); > + if (!ret) > + latest_dev->proc_seq_idx = idx; > + WARN_ON(ret); > + } > + pci_max_idx--; > + xa_unlock(&pci_seq_tree); > +} > + > /* iterator */ > static void *pci_seq_start(struct seq_file *m, loff_t *pos) > { > - struct pci_dev *dev = NULL; > + struct pci_dev *dev; > loff_t n = *pos; > > - for_each_pci_dev(dev) { > - if (!n--) > - break; > - } > + dev = xa_load(&pci_seq_tree, n); > + if (dev) > + pci_dev_get(dev); > return dev; I'm a little hesitant to add another place that keeps track of every PCI device. It's a fair bit of code here, and it's redundant information, which means more work to keep them all synchronized. This proc interface feels inherently racy. We keep track of the current item (n) in a struct seq_file, but I don't think there's anything to prevent a pci_dev hot-add or -remove between calls to pci_seq_start(). Is the proc interface the only place to get this information? If there's a way to get it from sysfs, maybe that is better and we don't need to spend effort optimizing the less-desirable path? > } > > static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos) > { > - struct pci_dev *dev = v; > + struct pci_dev *dev; > > (*pos)++; > - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); > + dev = xa_load(&pci_seq_tree, *pos); > + if (dev) > + pci_dev_get(dev); Where is the pci_dev_put() that corresponds with this new pci_dev_get()? > return dev; > } > > diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c > index e4ce1145aa3e..257ea46233a3 100644 > --- a/drivers/pci/remove.c > +++ b/drivers/pci/remove.c > @@ -53,6 +53,7 @@ static void pci_destroy_dev(struct pci_dev *dev) > pci_npem_remove(dev); > > device_del(&dev->dev); > + pci_seq_tree_remove_dev(dev); > > down_write(&pci_bus_sem); > list_del(&dev->bus_list); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 573b4c4c2be6..aeb3d4cce06a 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -534,6 +534,8 @@ struct pci_dev { > > /* These methods index pci_reset_fn_methods[] */ > u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */ > + > + unsigned long long proc_seq_idx; > }; > > static inline struct pci_dev *pci_physfn(struct pci_dev *dev) > -- > 2.43.0 >