pci_call_probe() uses work_on_cpu(), which creates and tears down a full kthread on each call, to invoke ->probe() on node local CPU for allocation affinity. The same goal can easily be achieved using a work item. This patch rewrites pci_call_probe() so that it uses a work item instead of work_on_cpu(). Note that the function is restructured for simplicity. This adds get/put_online_cpus() pair for devices without node but the overhead of doing so isn't anything material at this level. Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> --- I tested both paths but had to simulate device on foreign node. Thanks. drivers/pci/pci-driver.c | 48 +++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 5270f1a..554c15a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -274,11 +274,14 @@ struct drv_dev_and_id { struct pci_driver *drv; struct pci_dev *dev; const struct pci_device_id *id; + struct work_struct work; + int error; }; -static long local_pci_probe(void *_ddi) +static void pci_probe_fn(struct work_struct *work) { - struct drv_dev_and_id *ddi = _ddi; + struct drv_dev_and_id *ddi = container_of(work, struct drv_dev_and_id, + work); struct device *dev = &ddi->dev->dev; int rc; @@ -298,33 +301,38 @@ static long local_pci_probe(void *_ddi) pm_runtime_set_suspended(dev); pm_runtime_put_noidle(dev); } - return rc; + + ddi->error = rc; } static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, const struct pci_device_id *id) { - int error, node; + int node; + int cpu = nr_cpu_ids; struct drv_dev_and_id ddi = { drv, dev, id }; - /* Execute driver initialization on node where the device's - bus is attached to. This way the driver likely allocates - its local memory on the right node without any need to - change it. */ - node = dev_to_node(&dev->dev); - if (node >= 0) { - int cpu; + /* + * Execute driver initialization on node where the device's bus is + * attached to. This way the driver likely allocates its local + * memory on the right node without any need to change it. + */ + get_online_cpus(); - get_online_cpus(); + node = dev_to_node(&dev->dev); + if (node >= 0) cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); - if (cpu < nr_cpu_ids) - error = work_on_cpu(cpu, local_pci_probe, &ddi); - else - error = local_pci_probe(&ddi); - put_online_cpus(); - } else - error = local_pci_probe(&ddi); - return error; + + if (cpu < nr_cpu_ids) { + INIT_WORK_ONSTACK(&ddi.work, pci_probe_fn); + schedule_work_on(cpu, &ddi.work); + flush_work(&ddi.work); + } else { + pci_probe_fn(&ddi.work); + } + + put_online_cpus(); + return ddi.error; } /** -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html