On Wed, Jan 18, 2012 at 9:46 AM, Bjorn Helgaas <bhelgaas@xxxxxxxxxx> wrote: > On Wed, Jan 18, 2012 at 10:27 AM, Yinghai Lu <yinghai@xxxxxxxxxx> wrote: >> On Wed, Jan 18, 2012 at 8:52 AM, Bjorn Helgaas <bhelgaas@xxxxxxxxxx> wrote: >>> If we make a quirk for this machine, we still have the question of >>> what to do with my patches. I assert that if Linux ever reconfigures >>> any bus numbers or does any configuration of hot-added P2P bridges, it >>> must pay attention to the host bridge bus number window. Therefore, I >>> think we need something like this series even if we make a quirk. >> >> We may need more smart way to find unused bus range instead of just >> just max+1 and ++max. >> >> For example: one bridge (A) have two child bridges (B and C), >> A: bus range: 10-2f >> B: bus range: 11-1f >> C: bus range: 20-2f >> >> when some broken case happen, B bus BIOS assigned bus range will be >> all cleared in first pass. >> but C bus is ok. but in second bus, bus will be assigned to 30- . >> that is totally wrong, We should still >> try to use bus 11-1f at first for bus B. > > Yes, I agree we may need a better way to choose bus numbers in the > first place. My current patch (3/4) doesn't change how we choose > them; it only rejects invalid ones. come out draft about bus number tracking and allocate bus range instead. please check attached patch. Thanks Yinghai
Subject: [RFC PATCH] PCI: allocate bus range instead of use max blindly every bus will have extra busn_res, and linked them toghter to iobusn_resource. later when need to find usabled bus number range, try allocate from the resource tree. only test on x86 64 bit. need to do: 1. add /proc/iobusn ? 2. expand bus range, when can not find enough range in parent range. 3. use list instead to track transparent bus? Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx> diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index b9676ae..15cd4b7 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -172,7 +172,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) } struct pci_bus; -void x86_pci_root_bus_resources(int bus, struct list_head *resources); +void x86_pci_root_bus_resources(int bus, int *bus_max, struct list_head *resources); #ifdef CONFIG_SMP #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index a312e76..b48f0d9 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -348,6 +348,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) struct acpi_device *device = root->device; int domain = root->segment; int busnum = root->secondary.start; + int bus_max = root->secondary.end; LIST_HEAD(resources); struct pci_bus *bus; struct pci_sysdata *sd; @@ -405,12 +406,20 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) } else { get_current_resources(device, busnum, domain, &resources); if (list_empty(&resources)) - x86_pci_root_bus_resources(busnum, &resources); + x86_pci_root_bus_resources(busnum, &bus_max, &resources); bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); - if (bus) + if (bus) { + /* init busn_res */ + bus->busn_res.start = (domain << 8) | busnum; + bus->busn_res.end = (domain << 8) | bus_max; + bus->busn_res.flags = IORESOURCE_BUS; + + insert_resource(&iobusn_resource, &bus->busn_res); + dev_printk(KERN_DEBUG, &bus->dev, "busn_res: %06llx-%06llx inserted\n", (unsigned long long)bus->busn_res.start, (unsigned long long)bus->busn_res.end); + bus->subordinate = pci_scan_child_bus(bus); - else + } else pci_free_resource_list(&resources); } diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index fd3f655..3256fb9 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -7,7 +7,7 @@ int pci_root_num; struct pci_root_info pci_root_info[PCI_ROOT_NR]; -void x86_pci_root_bus_resources(int bus, struct list_head *resources) +void x86_pci_root_bus_resources(int bus, int *bus_max, struct list_head *resources) { int i; int j; @@ -28,6 +28,7 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources) bus); info = &pci_root_info[i]; + *bus_max = info->bus_max; for (j = 0; j < info->res_num; j++) { struct resource *res; struct resource *root; @@ -51,6 +52,10 @@ default_resources: printk(KERN_DEBUG "PCI: root bus %02x: using default resources\n", bus); pci_add_resource(resources, &ioport_resource); pci_add_resource(resources, &iomem_resource); + if (!bus) + *bus_max = 0xff; + else if (!*bus_max) + *bus_max = bus; } void __devinit update_res(struct pci_root_info *info, resource_size_t start, diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 323481e..e673355 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -433,6 +433,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) LIST_HEAD(resources); struct pci_bus *bus = NULL; struct pci_sysdata *sd; + int bus_max; while ((bus = pci_find_next_bus(bus)) != NULL) { if (bus->number == busnum) { @@ -454,8 +455,8 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) sd->node = get_mp_bus_to_node(busnum); printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); - x86_pci_root_bus_resources(busnum, &resources); - bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); + x86_pci_root_bus_resources(busnum, &bus_max, &resources); + bus = pci_scan_root_bus(NULL, busnum, bus_max, &pci_root_ops, sd, &resources); if (!bus) { pci_free_resource_list(&resources); kfree(sd); @@ -643,6 +644,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, LIST_HEAD(resources); struct pci_bus *bus = NULL; struct pci_sysdata *sd; + int bus_max; /* * Allocate per-root-bus (not per bus) arch-specific data. @@ -655,8 +657,8 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, return NULL; } sd->node = node; - x86_pci_root_bus_resources(busno, &resources); - bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); + x86_pci_root_bus_resources(busno, &bus_max, &resources); + bus = pci_scan_root_bus(NULL, busno, bus_max, ops, sd, &resources); if (!bus) { pci_free_resource_list(&resources); kfree(sd); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 05c5f76..dc5413d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -693,6 +693,12 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, child->primary = primary; child->subordinate = subordinate; child->bridge_ctl = bctl; + + child->busn_res.start = (pci_domain_nr(bus) << 8) | secondary; + child->busn_res.end = (pci_domain_nr(bus) << 8) | subordinate; + child->busn_res.flags = IORESOURCE_BUS | IORESOURCE_BUSY; + dev_printk(KERN_DEBUG, &child->dev, "busn_res: %06llx-%06llx inserted\n", (unsigned long long)child->busn_res.start, (unsigned long long)child->busn_res.end); + insert_resource(&bus->busn_res, &child->busn_res); } cmax = pci_scan_child_bus(child); @@ -705,6 +711,11 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, * We need to assign a number to this bus which we always * do in the second pass. */ + resource_size_t n_size; + struct resource busn_res; + int ret = -ENOMEM; + int old_max = max; + if (!pass) { if (pcibios_assign_all_busses() || broken) /* Temporarily disable forwarding of the @@ -721,20 +732,40 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, /* Clear errors */ pci_write_config_word(dev, PCI_STATUS, 0xffff); - /* Prevent assigning a bus number that already exists. - * This can happen when a bridge is hot-plugged, so in - * this case we only re-scan this bus. */ - child = pci_find_bus(pci_domain_nr(bus), max+1); - if (!child) { - child = pci_add_new_bus(bus, dev, ++max); - if (!child) - goto out; + /* find bigest range in bus->busn_res that we can use */ + n_size = resource_size(&bus->busn_res) - 1; + memset(&busn_res, 0, sizeof(struct resource)); + dev_printk(KERN_DEBUG, &dev->dev, "try to find busn in busn_res: %06llx-%06llx \n", (unsigned long long)bus->busn_res.start, (unsigned long long)bus->busn_res.end); + while (n_size > 0) { + ret = allocate_resource(&bus->busn_res, &busn_res, n_size, bus->busn_res.start + 1, bus->busn_res.end, 1, NULL, NULL); + if (ret == 0) + break; + n_size--; } + + if (ret != 0) + goto out; + + child = pci_add_new_bus(bus, dev, busn_res.start & 0xff); + if (!child) + goto out; + + child->subordinate = busn_res.end & 0xff; + + release_resource(&busn_res); + child->busn_res.start = busn_res.start; + child->busn_res.end = busn_res.end; + child->busn_res.flags = IORESOURCE_BUS | IORESOURCE_BUSY; + dev_printk(KERN_DEBUG, &child->dev, "busn_res: %06llx-%06llx inserted\n", child->busn_res.start, child->busn_res.end); + insert_resource(&bus->busn_res, &child->busn_res); + buses = (buses & 0xff000000) | ((unsigned int)(child->primary) << 0) | ((unsigned int)(child->secondary) << 8) | ((unsigned int)(child->subordinate) << 16); + max = child->secondary; + /* * yenta.c forces a secondary latency timer of 176. * Copy that behaviour here. @@ -802,6 +833,12 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, */ child->subordinate = max; pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max); + child->busn_res.end &= ~0xff; + child->busn_res.end |= max; + dev_printk(KERN_DEBUG, &child->dev, "busn_res: %06llx-%06llx adjusted\n", (unsigned long long)child->busn_res.start, (unsigned long long)child->busn_res.end); + + if (old_max > max) + max = old_max; } sprintf(child->name, @@ -1641,7 +1678,7 @@ err_out: return NULL; } -struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus, +struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus, int bus_max, struct pci_ops *ops, void *sysdata, struct list_head *resources) { struct pci_bus *b; @@ -1650,6 +1687,13 @@ struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus, if (!b) return NULL; + /* init busn_res */ + b->busn_res.start = (pci_domain_nr(b) << 8) | bus; + b->busn_res.end = (pci_domain_nr(b) << 8) | bus_max; + b->busn_res.flags = IORESOURCE_BUS; + + insert_resource(&iobusn_resource, &b->busn_res); + b->subordinate = pci_scan_child_bus(b); pci_bus_add_devices(b); return b; diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index ef3c18a..4ca0f74 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -68,6 +68,7 @@ void pci_remove_bus(struct pci_bus *pci_bus) down_write(&pci_bus_sem); list_del(&pci_bus->node); + release_resource(&pci_bus->busn_res); up_write(&pci_bus_sem); if (!pci_bus->is_added) return; diff --git a/include/linux/ioport.h b/include/linux/ioport.h index e885ba2..6fe9e19 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -136,6 +136,7 @@ struct resource { /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; +extern struct resource iobusn_resource; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); diff --git a/include/linux/pci.h b/include/linux/pci.h index d266085..edaac82 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -420,6 +420,7 @@ struct pci_bus { struct list_head slots; /* list of slots on this bus */ struct resource *resource[PCI_BRIDGE_RESOURCE_NUM]; struct list_head resources; /* address space routed to this bus */ + struct resource busn_res; /* track registered bus num range */ struct pci_ops *ops; /* configuration access functions */ void *sysdata; /* hook for sys-specific extension */ @@ -666,7 +667,8 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources); struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus, - struct pci_ops *ops, void *sysdata, + int bus_max, struct pci_ops *ops, + void *sysdata, struct list_head *resources); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); diff --git a/kernel/resource.c b/kernel/resource.c index 7640b3a..53b42f0 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -38,6 +38,14 @@ struct resource iomem_resource = { }; EXPORT_SYMBOL(iomem_resource); +struct resource iobusn_resource = { + .name = "PCI busn", + .start = 0, + .end = 0xffffff, + .flags = IORESOURCE_BUS, +}; +EXPORT_SYMBOL(iobusn_resource); + /* constraints to be met while allocating resources */ struct resource_constraint { resource_size_t min, max, align;