This patch assigns memory resource with specified alignment at boot-time or run-time. The patch is useful when we use PCI pass-through, because page-aligned memory resource is required for PCI pass-through. The patch can be used for general purposes. The patch is revised version based on the review comments. If you want to assign the resource at boot time, please set "pci=resource_alignment=" boot parameter. This is format of "pci=resource_alignment=" boot parameter: [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...] Specifies alignment and device to reassign aligned memory resources. If <order of align> is not specified, PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource windows need to be expanded. This is example: pci=resource_alignment=20@07:00.0;00:1d.7 If you want to assign the resource at run-time, please set "/sys/bus/pci/resource_alignment" file, and hot-remove the device and hot-add the device. For this purpose, fakephp can be used. The format of the file is the same with boot parameter. You can use "," instead of ";". For example: # /sbin/modprobe fakephp # cd /sys/bus/pci # echo -n 20@0d:00.0 > resource_alignment # cat slots/fake13/address 0000:0d:00 # echo -n 0 > slots/fake13/power # echo -n 1 > slots/fake1/power Thanks, -- Yuji Shimada Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b182626..290e701 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1756,6 +1756,15 @@ and is between 256 and 4096 characters. It is defined in the file cbmemsize=nn[KMG] The fixed amount of bus space which is reserved for the CardBus bridge's memory window. The default value is 64 megabytes. + resource_alignment= + Format: + [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...] + Specifies alignment and device to reassign + aligned memory resources. + If <order of align> is not specified, + PAGE_SIZE is used as alignment. + PCI-PCI bridge can be specified, if resource + windows need to be expanded. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6d61200..e270874 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -20,6 +20,8 @@ #include <linux/pm_wakeup.h> #include <linux/interrupt.h> #include <asm/dma.h> /* isa_dma_bridge_buggy */ +#include <linux/device.h> +#include <asm/setup.h> #include "pci.h" unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; @@ -2358,6 +2360,129 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) return 0; } +#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE +static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; +spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED; + +/** + * pci_specified_resource_alignemnt - get resource alignment specified by user. + * @dev: the PCI device to get + * + * RETURNS: Resource alignment if it is specified. + * Zero if it is not specified. + */ +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) +{ + int seg, bus, slot, func, align_order, count; + resource_size_t align = 0; + char *p; + unsigned long flags; + + spin_lock_irqsave(&resource_alignment_lock, flags); + p = resource_alignment_param; + while (*p) { + count = 0; + if (sscanf(p, "%d%n", &align_order, &count) == 1 && + p[count] == '@') { + p += count + 1; + } else { + align_order = -1; + } + if (sscanf(p, "%x:%x:%x.%x%n", + &seg, &bus, &slot, &func, &count) != 4) { + seg = 0; + if (sscanf(p, "%x:%x.%x%n", + &bus, &slot, &func, &count) != 3) { + /* Invalid format */ + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", + p); + break; + } + } + p += count; + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + if (align_order == -1) { + align = PAGE_SIZE; + } else { + align = 1 << align_order; + } + /* Found */ + break; + } + if (*p != ';' && *p != ',') { + /* End of param or invalid format */ + break; + } + p++; + } + spin_unlock_irqrestore(&resource_alignment_lock, flags); + return align; +} + +/** + * pci_is_reassigndev - check if specified PCI is target device to reassign + * @dev: the PCI device to check + * + * RETURNS: non-zero for PCI device is a target device to reassign, + * or zero is not. + */ +int pci_is_reassigndev(struct pci_dev *dev) +{ + return (pci_specified_resource_alignment(dev) != 0); +} + +ssize_t pci_set_resource_alignment_param(const char *buf, size_t count) +{ + unsigned long flags; + if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1) + count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1; + spin_lock_irqsave(&resource_alignment_lock, flags); + strncpy(resource_alignment_param, buf, count); + resource_alignment_param[count] = '\0'; + spin_unlock_irqrestore(&resource_alignment_lock, flags); + return count; +} + +ssize_t pci_get_resource_alignment_param(char *buf, size_t size) +{ + unsigned long flags; + size_t count; + spin_lock_irqsave(&resource_alignment_lock, flags); + count = snprintf(buf, size, "%s", resource_alignment_param); + spin_unlock_irqrestore(&resource_alignment_lock, flags); + return count; +} + +static void __devinit pci_resource_alignment_setup(char *str) +{ + pci_set_resource_alignment_param(str, strlen(str)); +} + +static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf) +{ + return pci_get_resource_alignment_param(buf, PAGE_SIZE); +} + +static ssize_t pci_resource_alignment_store(struct bus_type *bus, + const char *buf, size_t count) +{ + return pci_set_resource_alignment_param(buf, count); +} + +BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show, + pci_resource_alignment_store); + +static int __init pci_resource_alignment_sysfs_init(void) +{ + return bus_create_file(&pci_bus_type, + &bus_attr_resource_alignment); +} + +late_initcall(pci_resource_alignment_sysfs_init); + static void __devinit pci_no_domains(void) { #ifdef CONFIG_PCI_DOMAINS @@ -2406,6 +2531,8 @@ static int __init pci_setup(char *str) pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { pci_cardbus_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "resource_alignment=", 19)) { + pci_resource_alignment_setup(str + 19); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 07c0aa5..2cd1cba 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -195,4 +195,10 @@ static inline int pci_ari_enabled(struct pci_bus *bus) return bus->self && bus->self->ari_enabled; } +#ifdef CONFIG_PCI_QUIRKS +extern int pci_is_reassigndev(struct pci_dev *dev); +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); +extern void pci_disable_bridge_window(struct pci_dev *dev); +#endif + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f20d553..b17f1aa 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -33,6 +33,63 @@ int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); #ifdef CONFIG_PCI_QUIRKS +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'pci=resource_alignment='. + * It also rounds up size to specified alignment. + * Later on, the kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_resource_alignment(struct pci_dev *dev) +{ + int i; + struct resource *r; + resource_size_t align, size; + + if (!pci_is_reassigndev(dev)) + return; + + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + /* PCI host bridge isn't a target device */ + dev_warn(&dev->dev, + "Can't reassign resources to host bridge.\n"); + return; + } + + dev_info(&dev->dev, "Disabling device and release resources.\n"); + pci_disable_device(dev); + + align = pci_specified_resource_alignment(dev); + for (i=0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + size = r->end - r->start + 1; + if (size < align) { + size = align; + dev_info(&dev->dev, + "Rounding up size of resource #%d to %#llx.\n", + i, (unsigned long long)size); + } + r->end = size - 1; + r->start = 0; + + if (i < PCI_BRIDGE_RESOURCES) { + pci_update_resource(dev, i); + } + } + /* need to disable bridge's resource window, + * to enable the kernel to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_disable_bridge_window(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment); + /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow * PCI scanning code to "skip" this now blacklisted device. diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 32e8d88..b7a63c0 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +#ifdef CONFIG_PCI_QUIRKS +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disabling bridge window.\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} +#endif /* CONFIG_PCI_QUIRKS */ + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html