This patch adds the function that reassigns page-aligned memory resources to device, to linux. I created this patch for xen's dom0 linux. It is useful when we assign I/O device to HVM domain using pci passthrough, because page-aligned memory resource is required for pci passthrough. But I heard it is also useful for KVM. So I submit it to linux-pci ML. On many system, BIOS assigns memory resources to the device and enables it. So my patch disables the device, and releases memory resources. Then it assigns page-aligned memory resource to the device. To reassign page-aligned memory resources to device, please add boot parameters of linux as follows. reassigndev=00:1d.7,01:00.0 reassigndev= Specifies device to reassign page-aligned memory resources. PCI-PCI bridge can be specified, if resource windows need to be expanded. You can easily improve the way of specifying device to reassign, changing the code of reassigndev.c. Thanks, -- Yuji Shimada Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index e1ca425..202d3d2 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -32,6 +32,20 @@ config PCI_LEGACY option serves to include/exclude only a few drivers that are still using this API. +config PCI_REASSIGN + bool "Enable reassign page-aligned memory resources to device" + depends on PCI + default y + help + Say Y here if you want to reassign page-aligned memory resources to + the device. And add boot parameters of linux as follows. + + reassigndev=00:1d.7,01:00.0 + + "reassigndev=" specifies devices to reassign page-aligned memory + resources. PCI-PCI bridge can be specified, if resource windows need + to be expanded. + config PCI_DEBUG bool "PCI Debugging" depends on PCI && DEBUG_KERNEL diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index af3bfe2..50c6b35 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -6,6 +6,7 @@ obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \ pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \ irq.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_PCI_REASSIGN) += reassigndev.o # Build PCI Express stuff if needed obj-$(CONFIG_PCIEPORTBUS) += pcie/ diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9de87e9..4b51f3f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -171,4 +171,11 @@ static inline int pci_ari_enabled(struct pci_dev *dev) return dev->ari_enabled; } +#ifdef CONFIG_PCI_REASSIGN +extern int is_reassigndev(struct pci_dev *dev); +#else +#define is_reassigndev(dev) 0 +#endif +extern void pci_disable_bridge_window(struct pci_dev *dev); + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 96cf8ec..af2ae0e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -31,6 +31,53 @@ EXPORT_SYMBOL(pci_pci_problems); int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'reassigndev'. + * Later on, kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_release_resources(struct pci_dev *dev) +{ + int i; + struct resource *r; + + if (is_reassigndev(dev)) { + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + /* PCI Host Bridge isn't a target device */ + return; + } + printk(KERN_INFO + "PCI: Disable device and release resources [%s].\n", + pci_name(dev)); + pci_disable_device(dev); + + for (i=0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + + r->end = r->end - r->start; + r->start = 0; + + if (i < PCI_BRIDGE_RESOURCES) { + pci_update_resource(dev, r, i); + } + } + /* need to disable bridge's resource window, + * to make kernel enable to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + pci_disable_bridge_window(dev); + } + return; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_release_resources); + #ifdef CONFIG_PCI_QUIRKS /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow diff --git a/drivers/pci/reassigndev.c b/drivers/pci/reassigndev.c new file mode 100644 index 0000000..ea19481 --- /dev/null +++ b/drivers/pci/reassigndev.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2008, NEC Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> +#include "pci.h" + + +#define REASSIGNDEV_PARAM_MAX (2048) +#define TOKEN_MAX (12) /* "SSSS:BB:DD.F" length is 12 */ + +static char param_reassigndev[REASSIGNDEV_PARAM_MAX] = {0}; + +static int __init reassigndev_setup(char *str) +{ + strncpy(param_reassigndev, str, REASSIGNDEV_PARAM_MAX); + param_reassigndev[REASSIGNDEV_PARAM_MAX - 1] = '\0'; + return 1; +} +__setup("reassigndev=", reassigndev_setup); + +int is_reassigndev(struct pci_dev *dev) +{ + char dev_str[TOKEN_MAX+1]; + int seg, bus, slot, func; + int len; + char *p, *next_str; + + p = param_reassigndev; + for (; p; p = next_str + 1) { + next_str = strpbrk(p, ","); + if (next_str) { + len = next_str - p; + } else { + len = strlen(p); + } + if (len > 0 && len <= TOKEN_MAX) { + strncpy(dev_str, p, len); + *(dev_str + len) = '\0'; + + if (sscanf(dev_str, "%x:%x:%x.%x", + &seg, &bus, &slot, &func) != 4) { + if (sscanf(dev_str, "%x:%x.%x", + &bus, &slot, &func) == 3) { + seg = 0; + } else { + /* failed to scan strings */ + seg = -1; + bus = -1; + } + } + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + /* It's a target device */ + return 1; + } + } + if (!next_str) + break; + } + + return 0; +} diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ea979f2..96c0b3e 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -25,6 +25,7 @@ #include <linux/ioport.h> #include <linux/cache.h> #include <linux/slab.h> +#include "pci.h" static void pbus_assign_resources_sorted(struct pci_bus *bus) @@ -343,6 +344,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long list_for_each_entry(dev, &bus->devices, bus_list) { int i; + int reassign = is_reassigndev(dev); for (i = 0; i < PCI_NUM_RESOURCES; i++) { struct resource *r = &dev->resource[i]; @@ -351,6 +353,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long if (r->parent || (r->flags & mask) != type) continue; r_size = resource_size(r); + if (reassign) + r_size = ALIGN(r_size, PAGE_SIZE); /* For bridges size != alignment */ align = resource_alignment(r); order = __ffs(align) - 20; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 2dbd96c..111e5d2 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -122,17 +122,35 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disable bridge window\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; struct resource *res = dev->resource + resno; resource_size_t size, min, align; int ret; + int reassigndev = is_reassigndev(dev); size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; align = resource_alignment(res); + if ((reassigndev) && + (res->flags & IORESOURCE_MEM)) { + align = ALIGN(align, PAGE_SIZE); + } if (!align) { dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " "alignment) %pR flags %#lx\n", @@ -162,6 +180,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } else { res->flags &= ~IORESOURCE_STARTALIGN; if (resno < PCI_BRIDGE_RESOURCES) + if (reassigndev) + dev_dbg(&dev->dev, "BAR %d: assign resource " + "[%#llx - %#llx\n", resno, + (unsigned long long)res->start, + (unsigned long long)res->end); pci_update_resource(dev, res, resno); } -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html