This patch adds the function that reassigns page-aligned memory resources to device. This version is based on the feedback. I created this patch for xen's dom0 linux. It has already been included in xen's dom0 linux. It is useful when we assign I/O device to HVM domain using pci passthrough, because page-aligned memory resource is required for pci passthrough. It is also useful for KVM. So I submit it to linux-pci ML. Actually, there are similar patches for SR-IOV support. http://markmail.org/message/7frtyaq2rwiwt7fa http://markmail.org/message/7w6ebnryu6iob6nf http://markmail.org/message/trpfntlodhw7wfre My patch is simpler than them, because my patch needs only one boot parameter. On many system, BIOS assigns memory resources to the device and enables it. So my patch disables the device, and releases memory resources. Then it assigns page-aligned memory resources to the device. To reassign page-aligned memory resources to device, please add boot parameter of linux as follows. pci=pagealignmem=00:1d.7;01:00.0 pagealignmem=[ssss:]bb:dd.f[; ...] Specifies device to reassign page-aligned memory resources. PCI-PCI bridge can be specified, if resource windows need to be expanded. Thanks, -- Yuji Shimada Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c86c074..fddd619 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1704,6 +1704,10 @@ and is between 256 and 4096 characters. It is defined in the file cbmemsize=nn[KMG] The fixed amount of bus space which is reserved for the CardBus bridge's memory window. The default value is 64 megabytes. + pagealignmem=[ssss:]bb:dd.f[; ...] Specifies device to + reassign page-aligned memory resources. PCI-PCI + bridge can be specified, if resource windows + need to be expanded. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index e1ca425..64ef8ca 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -50,3 +50,8 @@ config HT_IRQ This allows native hypertransport devices to use interrupts. If unsure say Y. + +config PAGEALIGNMEM_PARAM + bool + depends on PCI_QUIRKS + default y diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 21f2ac6..cf5d7c5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2022,6 +2022,70 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) return bars; } +#ifdef CONFIG_PAGEALIGNMEM_PARAM +#define TOKEN_MAX (12) /* "SSSS:BB:DD.F" length is 12 */ +#define PAGEALIGNMEM_PARAM_MAX (2048) +static char param_pagealignmem[PAGEALIGNMEM_PARAM_MAX] = {0}; + +/** + * is_reassigndev - check if specified PCI is target device to reassign + * @dev: the PCI device to check + * + * RETURNS: non-zero for PCI device is a target device to reassign, + * or zero is not. + */ +int is_reassigndev(struct pci_dev *dev) +{ + char dev_str[TOKEN_MAX+1]; + int seg, bus, slot, func; + int len; + char *p, *next_str; + + p = param_pagealignmem; + for (; p; p = next_str + 1) { + next_str = strpbrk(p, ";"); + if (next_str) { + len = next_str - p; + } else { + len = strlen(p); + } + if (len > 0 && len <= TOKEN_MAX) { + strncpy(dev_str, p, len); + *(dev_str + len) = '\0'; + + if (sscanf(dev_str, "%x:%x:%x.%x", + &seg, &bus, &slot, &func) != 4) { + if (sscanf(dev_str, "%x:%x.%x", + &bus, &slot, &func) == 3) { + seg = 0; + } else { + /* failed to scan strings */ + seg = -1; + bus = -1; + } + } + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + /* It's a target device */ + return 1; + } + } + if (!next_str) + break; + } + + return 0; +} + +static void __devinit pci_pagealignmem_setup(char *str) +{ + strncpy(param_pagealignmem, str, PAGEALIGNMEM_PARAM_MAX); + param_pagealignmem[PAGEALIGNMEM_PARAM_MAX - 1] = '\0'; +} +#endif /* CONFIG_PAGEALIGNMEM_PARAM */ + static void __devinit pci_no_domains(void) { #ifdef CONFIG_PCI_DOMAINS @@ -2059,6 +2123,8 @@ static int __devinit pci_setup(char *str) pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { pci_cardbus_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "pagealignmem=", 13)) { + pci_pagealignmem_setup(str + 13); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9de87e9..9d28dba 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -171,4 +171,13 @@ static inline int pci_ari_enabled(struct pci_dev *dev) return dev->ari_enabled; } +#ifdef CONFIG_PCI_QUIRKS +extern int is_reassigndev(struct pci_dev *dev); +extern void pci_disable_bridge_window(struct pci_dev *dev); +#else +#define is_reassigndev(dev) 0 +#endif +extern resource_size_t + pci_resource_alignment(struct pci_dev *dev, struct resource *res); + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 5049a47..84c9825 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -32,6 +32,56 @@ int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); #ifdef CONFIG_PCI_QUIRKS +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'pci=pagealignmem='. + * Later on, kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_release_resources(struct pci_dev *dev) +{ + int i; + struct resource *r; + + if (!is_reassigndev(dev)) + return; + + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + /* PCI Host Bridge isn't a target device */ + printk(KERN_WARNING + "PCI: Can't reassign resources to Host Bridge[%s].\n", + pci_name(dev)); + return; + } + + printk(KERN_INFO "PCI: Disable device and release resources [%s].\n", + pci_name(dev)); + pci_disable_device(dev); + + for (i=0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + + r->end = r->end - r->start; + r->start = 0; + + if (i < PCI_BRIDGE_RESOURCES) { + pci_update_resource(dev, r, i); + } + } + /* need to disable bridge's resource window, + * to make kernel enable to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_disable_bridge_window(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_release_resources); + /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow * PCI scanning code to "skip" this now blacklisted device. diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ea979f2..767fc17 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -25,6 +25,7 @@ #include <linux/ioport.h> #include <linux/cache.h> #include <linux/slab.h> +#include "pci.h" static void pbus_assign_resources_sorted(struct pci_bus *bus) @@ -343,6 +344,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long list_for_each_entry(dev, &bus->devices, bus_list) { int i; + int reassign = is_reassigndev(dev); for (i = 0; i < PCI_NUM_RESOURCES; i++) { struct resource *r = &dev->resource[i]; @@ -351,8 +353,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long if (r->parent || (r->flags & mask) != type) continue; r_size = resource_size(r); + if (reassign) + r_size = ALIGN(r_size, PAGE_SIZE); /* For bridges size != alignment */ - align = resource_alignment(r); + align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; if (order > 11) { dev_warn(&dev->dev, "BAR %d bad alignment %llx: " diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 2dbd96c..011d915 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,6 +26,26 @@ #include "pci.h" +/** + * pci_resource_alignment - calculate reassign resource's alignment + * @dev: PCI device to assign resource + * @res: Resource to align + * + * Returns alignment on success, 0 (invalid alignment) on failure. + */ +resource_size_t +pci_resource_alignment(struct pci_dev *dev, struct resource *res) +{ + resource_size_t align = resource_alignment(res); + + if (align) + if (is_reassigndev(dev) && + (res->flags & IORESOURCE_MEM)) + align = ALIGN(align, PAGE_SIZE); + + return align; +} + void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; @@ -122,6 +142,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +#ifdef CONFIG_PCI_QUIRKS +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disable bridge window\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} +#endif /* CONFIG_PCI_QUIRKS */ + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; @@ -132,7 +167,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = resource_alignment(res); + align = pci_resource_alignment(dev, res); if (!align) { dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " "alignment) %pR flags %#lx\n", @@ -162,6 +197,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } else { res->flags &= ~IORESOURCE_STARTALIGN; if (resno < PCI_BRIDGE_RESOURCES) + if (is_reassigndev(dev)) + dev_dbg(&dev->dev, "BAR %d: assign resource " + "[%#llx - %#llx]\n", resno, + (unsigned long long)res->start, + (unsigned long long)res->end); pci_update_resource(dev, res, resno); } @@ -223,7 +263,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) if (!(r->flags) || r->parent) continue; - r_align = resource_alignment(r); + r_align = pci_resource_alignment(dev, r); if (!r_align) { dev_warn(&dev->dev, "BAR %d: bogus alignment " "%pR flags %#lx\n", @@ -235,7 +275,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) struct resource_list *ln = list->next; if (ln) - align = resource_alignment(ln->res); + align = pci_resource_alignment(ln->dev, + ln->res); if (r_align > align) { tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html