[RESEND][PATCH] PCI: Reassign page-aligned memory resources to device for pci passthrough.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



As I have not received a reply to my mail of November 6, 2008, I am
resending it herewith.

This patch adds the function that reassigns page-aligned memory
resources to device, to linux.

I created this patch for xen's dom0 linux. It have already been
included in xen's dom0 linux. It is useful when we assign I/O device
to HVM domain using pci passthrough, because page-aligned memory
resource is required for pci passthrough. It is also useful for
KVM. So I submit it to linux-pci ML.

Actually, there are similar patches for SR-IOV support. 

    http://markmail.org/message/7frtyaq2rwiwt7fa
    http://markmail.org/message/7w6ebnryu6iob6nf
    http://markmail.org/message/trpfntlodhw7wfre

My patch is simpler than them, because my patch needs only one boot
parameter.


On many system, BIOS assigns memory resources to the device and
enables it. So my patch disables the device, and releases memory
resources. Then it assigns page-aligned memory resource to the device.

To reassign page-aligned memory resources to device, please add boot
parameter of linux as follows.

	reassigndev=00:1d.7,01:00.0

	reassigndev=	Specifies device to reassign page-aligned
			memory resources.  PCI-PCI bridge can be
			specified, if resource windows need to be
			expanded.

You can easily improve the way of specifying device to reassign,
changing the code of reassigndev.c.

Thanks,
--
Yuji Shimada


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index e1ca425..e85896e 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -32,6 +32,20 @@ config PCI_LEGACY
 	  option serves to include/exclude only a few drivers that are
 	  still using this API.
 
+config PCI_REASSIGN
+	bool "Enable reassign page-aligned memory resources to device"
+	depends on PCI
+	default y
+	help
+	  Say Y here if you want to reassign page-aligned memory resources to
+	  the device. And add boot parameter of linux as follows.
+
+	  reassigndev=00:1d.7,01:00.0
+	  
+	  "reassigndev=" specifies devices to reassign page-aligned memory
+	  resources. PCI-PCI bridge can be specified, if resource windows need
+	  to be expanded.
+
 config PCI_DEBUG
 	bool "PCI Debugging"
 	depends on PCI && DEBUG_KERNEL
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index af3bfe2..50c6b35 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -6,6 +6,7 @@ obj-y		+= access.o bus.o probe.o remove.o pci.o quirks.o slot.o \
 			pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
 			irq.o
 obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_PCI_REASSIGN) += reassigndev.o
 
 # Build PCI Express stuff if needed
 obj-$(CONFIG_PCIEPORTBUS) += pcie/
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9de87e9..0b95700 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -171,4 +171,13 @@ static inline int pci_ari_enabled(struct pci_dev *dev)
 	return dev->ari_enabled;
 }
 
+#ifdef CONFIG_PCI_REASSIGN
+extern int is_reassigndev(struct pci_dev *dev);
+extern void pci_disable_bridge_window(struct pci_dev *dev);
+#else
+#define is_reassigndev(dev) 0
+#endif
+extern resource_size_t
+	pci_resource_alignment(struct pci_dev *dev, struct resource *res);
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bbf66ea..e3607f8 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -31,6 +31,54 @@ EXPORT_SYMBOL(pci_pci_problems);
 int pcie_mch_quirk;
 EXPORT_SYMBOL(pcie_mch_quirk);
 
+#ifdef CONFIG_PCI_REASSIGN
+/*
+ * This quirk function disables the device and releases resources
+ * which is specified by kernel's boot parameter 'reassigndev'.
+ * Later on, kernel will assign page-aligned memory resource back
+ * to that device.
+ */
+static void __devinit quirk_release_resources(struct pci_dev *dev)
+{
+	int i;
+	struct resource *r;
+
+	if (is_reassigndev(dev)) {
+		if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+		    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+			/* PCI Host Bridge isn't a target device */
+			return;
+		}
+		printk(KERN_INFO
+			"PCI: Disable device and release resources [%s].\n",
+			pci_name(dev));
+		pci_disable_device(dev);
+
+		for (i=0; i < PCI_NUM_RESOURCES; i++) {
+			r = &dev->resource[i];
+			if (!(r->flags & IORESOURCE_MEM))
+				continue;
+
+			r->end = r->end - r->start;
+			r->start = 0;
+
+			if (i < PCI_BRIDGE_RESOURCES) {
+				pci_update_resource(dev, r, i);
+			}
+		}
+		/* need to disable bridge's resource window,
+		 * to make kernel enable to reassign new resource
+		 * window later on.
+		 */
+		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+		    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+			pci_disable_bridge_window(dev);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_release_resources);
+#endif  /* CONFIG_PCI_REASSIGN */
+
 #ifdef CONFIG_PCI_QUIRKS
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
diff --git a/drivers/pci/reassigndev.c b/drivers/pci/reassigndev.c
new file mode 100644
index 0000000..ea19481
--- /dev/null
+++ b/drivers/pci/reassigndev.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2008, NEC Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include "pci.h"
+
+
+#define	REASSIGNDEV_PARAM_MAX	(2048)
+#define	TOKEN_MAX	(12)	/* "SSSS:BB:DD.F" length is 12 */
+
+static char param_reassigndev[REASSIGNDEV_PARAM_MAX] = {0};
+
+static int __init reassigndev_setup(char *str)
+{
+	strncpy(param_reassigndev, str, REASSIGNDEV_PARAM_MAX);
+	param_reassigndev[REASSIGNDEV_PARAM_MAX - 1] = '\0';
+	return 1;
+}
+__setup("reassigndev=", reassigndev_setup);
+
+int is_reassigndev(struct pci_dev *dev)
+{
+	char dev_str[TOKEN_MAX+1];
+	int seg, bus, slot, func;
+	int len;
+	char *p, *next_str;
+
+	p = param_reassigndev;
+	for (; p; p = next_str + 1) {
+		next_str = strpbrk(p, ",");
+		if (next_str) {
+			len = next_str - p;
+		} else {
+			len = strlen(p);
+		}
+		if (len > 0 && len <= TOKEN_MAX) {
+			strncpy(dev_str, p, len);
+			*(dev_str + len) = '\0';
+
+			if (sscanf(dev_str, "%x:%x:%x.%x", 
+				&seg, &bus, &slot, &func) != 4) {
+				if (sscanf(dev_str, "%x:%x.%x", 
+					&bus, &slot, &func) == 3) {
+					seg = 0;
+				} else {
+					/* failed to scan strings */
+					seg = -1;
+					bus = -1;
+				}
+			}
+			if (seg == pci_domain_nr(dev->bus) &&
+			    bus == dev->bus->number &&
+			    slot == PCI_SLOT(dev->devfn) &&
+			    func == PCI_FUNC(dev->devfn)) {
+				/* It's a target device */
+				return 1;
+			}
+		}
+		if (!next_str)
+			break;
+	}
+
+	return 0;
+}
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index ea979f2..767fc17 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -25,6 +25,7 @@
 #include <linux/ioport.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
+#include "pci.h"
 
 
 static void pbus_assign_resources_sorted(struct pci_bus *bus)
@@ -343,6 +344,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		int i;
+		int reassign = is_reassigndev(dev);
 		
 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 			struct resource *r = &dev->resource[i];
@@ -351,8 +353,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 			if (r->parent || (r->flags & mask) != type)
 				continue;
 			r_size = resource_size(r);
+			if (reassign)
+				r_size = ALIGN(r_size, PAGE_SIZE);
 			/* For bridges size != alignment */
-			align = resource_alignment(r);
+			align = pci_resource_alignment(dev, r);
 			order = __ffs(align) - 20;
 			if (order > 11) {
 				dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 2dbd96c..0f33b3c 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -26,6 +26,26 @@
 #include "pci.h"
 
 
+/**
+ * pci_resource_alignment - calculate reassign resource's alignment
+ * @dev: pci device pointer
+ * @res: resource pointer
+ *
+ * Returns alignment on success, 0 (invalid alignment) on failure.
+ */
+resource_size_t
+pci_resource_alignment(struct pci_dev *dev, struct resource *res)
+{
+	resource_size_t r_size = resource_alignment(res);
+
+	if (r_size)
+		if (is_reassigndev(dev) &&
+		    (res->flags & IORESOURCE_MEM))
+			r_size = ALIGN(r_size, PAGE_SIZE);
+
+	return r_size;
+}
+
 void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
 {
 	struct pci_bus_region region;
@@ -122,6 +142,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 	return err;
 }
 
+#ifdef CONFIG_PCI_REASSIGN
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+	dev_dbg(&dev->dev, "Disable bridge window\n");
+  
+ 	/* MMIO Base/Limit */
+ 	pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+  
+ 	/* Prefetchable MMIO Base/Limit */
+ 	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+ 	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+ 	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+#endif	/* CONFIG_PCI_REASSIGN */
+
 int pci_assign_resource(struct pci_dev *dev, int resno)
 {
 	struct pci_bus *bus = dev->bus;
@@ -132,7 +167,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	size = resource_size(res);
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
-	align = resource_alignment(res);
+	align = pci_resource_alignment(dev, res);
 	if (!align) {
 		dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
 			"alignment) %pR flags %#lx\n",
@@ -162,6 +197,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	} else {
 		res->flags &= ~IORESOURCE_STARTALIGN;
 		if (resno < PCI_BRIDGE_RESOURCES)
+			if (is_reassigndev(dev))
+				dev_dbg(&dev->dev, "BAR %d: assign resource "
+					"[%#llx - %#llx]\n", resno,
+					 (unsigned long long)res->start,
+					 (unsigned long long)res->end);
 			pci_update_resource(dev, res, resno);
 	}
 
@@ -223,7 +263,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 		if (!(r->flags) || r->parent)
 			continue;
 
-		r_align = resource_alignment(r);
+		r_align = pci_resource_alignment(dev, r);
 		if (!r_align) {
 			dev_warn(&dev->dev, "BAR %d: bogus alignment "
 				"%pR flags %#lx\n",
@@ -235,7 +275,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 			struct resource_list *ln = list->next;
 
 			if (ln)
-				align = resource_alignment(ln->res);
+				align = pci_resource_alignment(ln->dev,
+								ln->res);
 
 			if (r_align > align) {
 				tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux