As I have not received a reply to my mail of November 6, 2008, I am resending it herewith. This patch adds the function that reassigns page-aligned memory resources to device, to linux. I created this patch for xen's dom0 linux. It have already been included in xen's dom0 linux. It is useful when we assign I/O device to HVM domain using pci passthrough, because page-aligned memory resource is required for pci passthrough. It is also useful for KVM. So I submit it to linux-pci ML. Actually, there are similar patches for SR-IOV support. http://markmail.org/message/7frtyaq2rwiwt7fa http://markmail.org/message/7w6ebnryu6iob6nf http://markmail.org/message/trpfntlodhw7wfre My patch is simpler than them, because my patch needs only one boot parameter. On many system, BIOS assigns memory resources to the device and enables it. So my patch disables the device, and releases memory resources. Then it assigns page-aligned memory resource to the device. To reassign page-aligned memory resources to device, please add boot parameter of linux as follows. reassigndev=00:1d.7,01:00.0 reassigndev= Specifies device to reassign page-aligned memory resources. PCI-PCI bridge can be specified, if resource windows need to be expanded. You can easily improve the way of specifying device to reassign, changing the code of reassigndev.c. Thanks, -- Yuji Shimada Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index e1ca425..e85896e 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -32,6 +32,20 @@ config PCI_LEGACY option serves to include/exclude only a few drivers that are still using this API. +config PCI_REASSIGN + bool "Enable reassign page-aligned memory resources to device" + depends on PCI + default y + help + Say Y here if you want to reassign page-aligned memory resources to + the device. And add boot parameter of linux as follows. + + reassigndev=00:1d.7,01:00.0 + + "reassigndev=" specifies devices to reassign page-aligned memory + resources. PCI-PCI bridge can be specified, if resource windows need + to be expanded. + config PCI_DEBUG bool "PCI Debugging" depends on PCI && DEBUG_KERNEL diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index af3bfe2..50c6b35 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -6,6 +6,7 @@ obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \ pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \ irq.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_PCI_REASSIGN) += reassigndev.o # Build PCI Express stuff if needed obj-$(CONFIG_PCIEPORTBUS) += pcie/ diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9de87e9..0b95700 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -171,4 +171,13 @@ static inline int pci_ari_enabled(struct pci_dev *dev) return dev->ari_enabled; } +#ifdef CONFIG_PCI_REASSIGN +extern int is_reassigndev(struct pci_dev *dev); +extern void pci_disable_bridge_window(struct pci_dev *dev); +#else +#define is_reassigndev(dev) 0 +#endif +extern resource_size_t + pci_resource_alignment(struct pci_dev *dev, struct resource *res); + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index bbf66ea..e3607f8 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -31,6 +31,54 @@ EXPORT_SYMBOL(pci_pci_problems); int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); +#ifdef CONFIG_PCI_REASSIGN +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'reassigndev'. + * Later on, kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_release_resources(struct pci_dev *dev) +{ + int i; + struct resource *r; + + if (is_reassigndev(dev)) { + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + /* PCI Host Bridge isn't a target device */ + return; + } + printk(KERN_INFO + "PCI: Disable device and release resources [%s].\n", + pci_name(dev)); + pci_disable_device(dev); + + for (i=0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + + r->end = r->end - r->start; + r->start = 0; + + if (i < PCI_BRIDGE_RESOURCES) { + pci_update_resource(dev, r, i); + } + } + /* need to disable bridge's resource window, + * to make kernel enable to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_disable_bridge_window(dev); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_release_resources); +#endif /* CONFIG_PCI_REASSIGN */ + #ifdef CONFIG_PCI_QUIRKS /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow diff --git a/drivers/pci/reassigndev.c b/drivers/pci/reassigndev.c new file mode 100644 index 0000000..ea19481 --- /dev/null +++ b/drivers/pci/reassigndev.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2008, NEC Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include "pci.h" + + +#define REASSIGNDEV_PARAM_MAX (2048) +#define TOKEN_MAX (12) /* "SSSS:BB:DD.F" length is 12 */ + +static char param_reassigndev[REASSIGNDEV_PARAM_MAX] = {0}; + +static int __init reassigndev_setup(char *str) +{ + strncpy(param_reassigndev, str, REASSIGNDEV_PARAM_MAX); + param_reassigndev[REASSIGNDEV_PARAM_MAX - 1] = '\0'; + return 1; +} +__setup("reassigndev=", reassigndev_setup); + +int is_reassigndev(struct pci_dev *dev) +{ + char dev_str[TOKEN_MAX+1]; + int seg, bus, slot, func; + int len; + char *p, *next_str; + + p = param_reassigndev; + for (; p; p = next_str + 1) { + next_str = strpbrk(p, ","); + if (next_str) { + len = next_str - p; + } else { + len = strlen(p); + } + if (len > 0 && len <= TOKEN_MAX) { + strncpy(dev_str, p, len); + *(dev_str + len) = '\0'; + + if (sscanf(dev_str, "%x:%x:%x.%x", + &seg, &bus, &slot, &func) != 4) { + if (sscanf(dev_str, "%x:%x.%x", + &bus, &slot, &func) == 3) { + seg = 0; + } else { + /* failed to scan strings */ + seg = -1; + bus = -1; + } + } + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + /* It's a target device */ + return 1; + } + } + if (!next_str) + break; + } + + return 0; +} diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ea979f2..767fc17 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -25,6 +25,7 @@ #include <linux/ioport.h> #include <linux/cache.h> #include <linux/slab.h> +#include "pci.h" static void pbus_assign_resources_sorted(struct pci_bus *bus) @@ -343,6 +344,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long list_for_each_entry(dev, &bus->devices, bus_list) { int i; + int reassign = is_reassigndev(dev); for (i = 0; i < PCI_NUM_RESOURCES; i++) { struct resource *r = &dev->resource[i]; @@ -351,8 +353,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long if (r->parent || (r->flags & mask) != type) continue; r_size = resource_size(r); + if (reassign) + r_size = ALIGN(r_size, PAGE_SIZE); /* For bridges size != alignment */ - align = resource_alignment(r); + align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; if (order > 11) { dev_warn(&dev->dev, "BAR %d bad alignment %llx: " diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 2dbd96c..0f33b3c 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,6 +26,26 @@ #include "pci.h" +/** + * pci_resource_alignment - calculate reassign resource's alignment + * @dev: pci device pointer + * @res: resource pointer + * + * Returns alignment on success, 0 (invalid alignment) on failure. + */ +resource_size_t +pci_resource_alignment(struct pci_dev *dev, struct resource *res) +{ + resource_size_t r_size = resource_alignment(res); + + if (r_size) + if (is_reassigndev(dev) && + (res->flags & IORESOURCE_MEM)) + r_size = ALIGN(r_size, PAGE_SIZE); + + return r_size; +} + void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; @@ -122,6 +142,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +#ifdef CONFIG_PCI_REASSIGN +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disable bridge window\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} +#endif /* CONFIG_PCI_REASSIGN */ + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; @@ -132,7 +167,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = resource_alignment(res); + align = pci_resource_alignment(dev, res); if (!align) { dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " "alignment) %pR flags %#lx\n", @@ -162,6 +197,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } else { res->flags &= ~IORESOURCE_STARTALIGN; if (resno < PCI_BRIDGE_RESOURCES) + if (is_reassigndev(dev)) + dev_dbg(&dev->dev, "BAR %d: assign resource " + "[%#llx - %#llx]\n", resno, + (unsigned long long)res->start, + (unsigned long long)res->end); pci_update_resource(dev, res, resno); } @@ -223,7 +263,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) if (!(r->flags) || r->parent) continue; - r_align = resource_alignment(r); + r_align = pci_resource_alignment(dev, r); if (!r_align) { dev_warn(&dev->dev, "BAR %d: bogus alignment " "%pR flags %#lx\n", @@ -235,7 +275,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) struct resource_list *ln = list->next; if (ln) - align = resource_alignment(ln->res); + align = pci_resource_alignment(ln->dev, + ln->res); if (r_align > align) { tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html