Re: [PATCH v2] PCI: Assign PCI domain by ida_alloc()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Pali,

On 14/07/2022 19:41, Pali Rohár wrote:
Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().

Use two IDAs, one for static domain allocations (those which are defined in
device tree) and second for dynamic allocations (all other).

During removal of root bus / host bridge release also allocated domain id.
So released id can be reused again, for example in situation when
dynamically loading and unloading native PCI host bridge drivers.

This change also allows to mix static device tree assignment and dynamic by
kernel as all static allocations are reserved in dynamic pool.

Signed-off-by: Pali Rohár <pali@xxxxxxxxxx>
---
Idea of this patch comes from the following discussion:
https://lore.kernel.org/linux-pci/20210412123936.25555-1-pali@xxxxxxxxxx/t/#u

Changes in v2:
* Fix broken compilation
---
  drivers/pci/pci.c    | 103 +++++++++++++++++++++++++------------------
  drivers/pci/probe.c  |   5 +++
  drivers/pci/remove.c |   6 +++
  include/linux/pci.h  |   1 +
  4 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index cfaf40a540a8..34fdcee6634a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
  }
#ifdef CONFIG_PCI_DOMAINS_GENERIC
-static atomic_t __domain_nr = ATOMIC_INIT(-1);
+static DEFINE_IDA(pci_domain_nr_static_ida);
+static DEFINE_IDA(pci_domain_nr_dynamic_ida);
-static int pci_get_new_domain_nr(void)
+static void of_pci_reserve_static_domain_nr(void)
  {
-	return atomic_inc_return(&__domain_nr);
+	struct device_node *np;
+	int domain_nr;
+
+	for_each_node_by_type(np, "pci") {
+		domain_nr = of_get_pci_domain_nr(np);
+		if (domain_nr < 0)
+			continue;
+		/*
+		 * Permanently allocate domain_nr in dynamic_ida
+		 * to prevent it from dynamic allocation.
+		 */
+		ida_alloc_range(&pci_domain_nr_dynamic_ida,
+				domain_nr, domain_nr, GFP_KERNEL);
+	}
  }
static int of_pci_bus_find_domain_nr(struct device *parent)
  {
-	static int use_dt_domains = -1;
-	int domain = -1;
+	static bool static_domains_reserved = false;
+	int domain_nr;
- if (parent)
-		domain = of_get_pci_domain_nr(parent->of_node);
+	/* On the first call scan device tree for static allocations. */
+	if (!static_domains_reserved) {
+		of_pci_reserve_static_domain_nr();
+		static_domains_reserved = true;
+	}
+
+	if (parent) {
+		/*
+		 * If domain is in DT then allocate it in static IDA.
+		 * This prevent duplicate static allocations in case
+		 * of errors in DT.
+		 */
+		domain_nr = of_get_pci_domain_nr(parent->of_node);
+		if (domain_nr >= 0)
+			return ida_alloc_range(&pci_domain_nr_static_ida,
+					       domain_nr, domain_nr,
+					       GFP_KERNEL);
+	}
/*
-	 * Check DT domain and use_dt_domains values.
-	 *
-	 * If DT domain property is valid (domain >= 0) and
-	 * use_dt_domains != 0, the DT assignment is valid since this means
-	 * we have not previously allocated a domain number by using
-	 * pci_get_new_domain_nr(); we should also update use_dt_domains to
-	 * 1, to indicate that we have just assigned a domain number from
-	 * DT.
-	 *
-	 * If DT domain property value is not valid (ie domain < 0), and we
-	 * have not previously assigned a domain number from DT
-	 * (use_dt_domains != 1) we should assign a domain number by
-	 * using the:
-	 *
-	 * pci_get_new_domain_nr()
-	 *
-	 * API and update the use_dt_domains value to keep track of method we
-	 * are using to assign domain numbers (use_dt_domains = 0).
-	 *
-	 * All other combinations imply we have a platform that is trying
-	 * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
-	 * which is a recipe for domain mishandling and it is prevented by
-	 * invalidating the domain value (domain = -1) and printing a
-	 * corresponding error.
+	 * If domain was not specified in DT then choose free id from dynamic
+	 * allocations. All domain numbers from DT are permanently in dynamic
+	 * allocations to prevent assigning them to other DT nodes without
+	 * static domain.
  	 */
-	if (domain >= 0 && use_dt_domains) {
-		use_dt_domains = 1;
-	} else if (domain < 0 && use_dt_domains != 1) {
-		use_dt_domains = 0;
-		domain = pci_get_new_domain_nr();
-	} else {
-		if (parent)
-			pr_err("Node %pOF has ", parent->of_node);
-		pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
-		domain = -1;
-	}
+	return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
+}
- return domain;
+static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	if (bus->domain_nr < 0)
+		return;
+
+	/* Release domain from ida in which was it allocated. */
+	if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
+		ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
+	else
+		ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
  }
int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
@@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
  	return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
  			       acpi_pci_bus_find_domain_nr(bus);
  }
+
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	if (!acpi_disabled)
+		return;
+	of_pci_bus_release_domain_nr(bus, parent);
+}
  #endif
/**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 17a969942d37..12092d238403 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
  		bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
  	else
  		bus->domain_nr = bridge->domain_nr;
+	if (bus->domain_nr < 0)
+		goto free;
  #endif
b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
@@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
  	device_del(&bridge->dev);
free:
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	pci_bus_release_domain_nr(bus, parent);
+#endif
  	kfree(bus);
  	return err;
  }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 4c54c75050dc..0145aef1b930 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
  	pci_remove_bus(bus);
  	host_bridge->bus = NULL;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	/* Release domain_nr if it was dynamically allocated */
+	if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+		pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+#endif
+
  	/* remove the host bridge */
  	device_del(&host_bridge->dev);
  }


After this change was made we are seeing the following bug
report on a Tegra234 Jetson Orin board ...

[   17.172346] tegra194-pcie 141a0000.pcie: host bridge /pcie@141a0000 ranges:
[   17.172470] tegra194-pcie 141a0000.pcie:      MEM 0x2800000000..0x2b27ffffff -> 0x2800000000
[   17.172519] tegra194-pcie 141a0000.pcie:      MEM 0x2b28000000..0x2b2fffffff -> 0x0040000000
[   17.172548] tegra194-pcie 141a0000.pcie:       IO 0x003a100000..0x003a1fffff -> 0x003a100000
[   17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8 ob, 2 ib, align 64K, limit 32G
[   18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
[   19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
[   19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to bus 0005:00
[   19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
[   19.279622] pci_bus 0005:00: root bus resource [mem 0x2800000000-0x2b27ffffff pref]
[   19.279631] pci_bus 0005:00: root bus resource [mem 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
[   19.279639] pci_bus 0005:00: root bus resource [io  0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
[   19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
[   19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
[   19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
[   19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
[   19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
[   19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
[   19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
[   19.285591] pci 0005:00:00.0: Removing from iommu group 26
[   19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
[   19.285870] ==================================================================
[   19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70

[   19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115):
[   19.309677]  pci_bus_release_domain_nr+0x10/0x70
[   19.309691]  dw_pcie_host_deinit+0x28/0x78
[   19.309702]  tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
[   19.309734]  tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
[   19.309752]  platform_probe+0x90/0xd8
[   19.309764]  really_probe+0xb8/0x298
[   19.309777]  __driver_probe_device+0x78/0xd8
[   19.309788]  driver_probe_device+0x38/0x120
[   19.309799]  __device_attach_driver+0x94/0xe0
[   19.309812]  bus_for_each_drv+0x70/0xc8
[   19.309822]  __device_attach+0xfc/0x188
[   19.309833]  device_initial_probe+0x10/0x18
[   19.309844]  bus_probe_device+0x94/0xa0
[   19.309854]  deferred_probe_work_func+0x80/0xb8
[   19.309864]  process_one_work+0x1e0/0x348
[   19.309882]  worker_thread+0x48/0x410
[   19.309891]  kthread+0xf4/0x110
[   19.309904]  ret_from_fork+0x10/0x20

[   19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k

[   19.311469] allocated by task 96 on cpu 10 at 19.279323s:
[   19.311562]  __kmem_cache_alloc_node+0x260/0x278
[   19.311571]  kmalloc_trace+0x24/0x30
[   19.311580]  pci_alloc_bus+0x24/0xa0
[   19.311590]  pci_register_host_bridge+0x48/0x4b8
[   19.311601]  pci_scan_root_bus_bridge+0xc0/0xe8
[   19.311613]  pci_host_probe+0x18/0xc0
[   19.311623]  dw_pcie_host_init+0x2c0/0x568
[   19.311630]  tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
[   19.311647]  platform_probe+0x90/0xd8
[   19.311653]  really_probe+0xb8/0x298
[   19.311663]  __driver_probe_device+0x78/0xd8
[   19.311672]  driver_probe_device+0x38/0x120
[   19.311682]  __device_attach_driver+0x94/0xe0
[   19.311694]  bus_for_each_drv+0x70/0xc8
[   19.311702]  __device_attach+0xfc/0x188
[   19.311713]  device_initial_probe+0x10/0x18
[   19.311724]  bus_probe_device+0x94/0xa0
[   19.311733]  deferred_probe_work_func+0x80/0xb8
[   19.311743]  process_one_work+0x1e0/0x348
[   19.311753]  worker_thread+0x48/0x410
[   19.311763]  kthread+0xf4/0x110
[   19.311771]  ret_from_fork+0x10/0x20

[   19.311782] freed by task 96 on cpu 10 at 19.285833s:
[   19.311799]  release_pcibus_dev+0x30/0x40
[   19.311808]  device_release+0x30/0x90
[   19.311814]  kobject_put+0xa8/0x120
[   19.311832]  device_unregister+0x20/0x30
[   19.311839]  pci_remove_bus+0x78/0x88
[   19.311850]  pci_remove_root_bus+0x5c/0x98
[   19.311860]  dw_pcie_host_deinit+0x28/0x78
[   19.311866]  tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
[   19.311883]  tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
[   19.311900]  platform_probe+0x90/0xd8
[   19.311906]  really_probe+0xb8/0x298
[   19.311916]  __driver_probe_device+0x78/0xd8
[   19.311926]  driver_probe_device+0x38/0x120
[   19.311936]  __device_attach_driver+0x94/0xe0
[   19.311947]  bus_for_each_drv+0x70/0xc8
[   19.311956]  __device_attach+0xfc/0x188
[   19.311966]  device_initial_probe+0x10/0x18
[   19.311976]  bus_probe_device+0x94/0xa0
[   19.311985]  deferred_probe_work_func+0x80/0xb8
[   19.311995]  process_one_work+0x1e0/0x348
[   19.312005]  worker_thread+0x48/0x410
[   19.312014]  kthread+0xf4/0x110
[   19.312022]  ret_from_fork+0x10/0x20

[   19.313579] CPU: 10 PID: 96 Comm: kworker/u24:2 Not tainted 6.2.0 #4
[   19.320171] Hardware name:  /, BIOS 1.0-d7fb19b 08/10/2022
[   19.325852] Workqueue: events_unbound deferred_probe_work_func
[   19.331919] ==================================================================

After reverting this change I no longer see this issue.
Let me know if you have any thoughts.

Thanks
Jon
--
nvpublic



[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux