[PATCH v2] PCI: Reassign page-aligned memory resources to device for pci passthrough

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds the function that reassigns page-aligned memory
resources to device. This version is based on the feedback.


I created this patch for xen's dom0 linux. It has already been
included in xen's dom0 linux. It is useful when we assign I/O device
to HVM domain using pci passthrough, because page-aligned memory
resource is required for pci passthrough. It is also useful for
KVM. So I submit it to linux-pci ML.

Actually, there are similar patches for SR-IOV support. 

    http://markmail.org/message/7frtyaq2rwiwt7fa
    http://markmail.org/message/7w6ebnryu6iob6nf
    http://markmail.org/message/trpfntlodhw7wfre

My patch is simpler than them, because my patch needs only one boot
parameter.


On many system, BIOS assigns memory resources to the device and
enables it. So my patch disables the device, and releases memory
resources. Then it assigns page-aligned memory resources to the device.

To reassign page-aligned memory resources to device, please add boot
parameter of linux as follows.

	pci=pagealignmem=00:1d.7;01:00.0

		pagealignmem=[ssss:]bb:dd.f[; ...]	Specifies device to 
				reassign page-aligned memory resources. PCI-PCI
				bridge can be specified, if resource windows
				need to be expanded.

Thanks,
--
Yuji Shimada


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c86c074..fddd619 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1704,6 +1704,10 @@ and is between 256 and 4096 characters. It is defined in the file
 		cbmemsize=nn[KMG]	The fixed amount of bus space which is
 				reserved for the CardBus bridge's memory
 				window. The default value is 64 megabytes.
+		pagealignmem=[ssss:]bb:dd.f[; ...]	Specifies device to 
+				reassign page-aligned memory resources. PCI-PCI
+				bridge can be specified, if resource windows
+				need to be expanded.
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index e1ca425..64ef8ca 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -50,3 +50,8 @@ config HT_IRQ
 	   This allows native hypertransport devices to use interrupts.
 
 	   If unsure say Y.
+
+config PAGEALIGNMEM_PARAM
+	bool
+	depends on PCI_QUIRKS
+	default y
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 21f2ac6..cf5d7c5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2022,6 +2022,70 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags)
 	return bars;
 }
 
+#ifdef CONFIG_PAGEALIGNMEM_PARAM
+#define	TOKEN_MAX	(12)	/* "SSSS:BB:DD.F" length is 12 */
+#define	PAGEALIGNMEM_PARAM_MAX	(2048)
+static char param_pagealignmem[PAGEALIGNMEM_PARAM_MAX] = {0};
+
+/**
+ * is_reassigndev - check if specified PCI is target device to reassign
+ * @dev: the PCI device to check
+ *
+ * RETURNS: non-zero for PCI device is a target device to reassign,
+ *          or zero is not.
+ */
+int is_reassigndev(struct pci_dev *dev)
+{
+	char dev_str[TOKEN_MAX+1];
+	int seg, bus, slot, func;
+	int len;
+	char *p, *next_str;
+
+	p = param_pagealignmem;
+	for (; p; p = next_str + 1) {
+		next_str = strpbrk(p, ";");
+		if (next_str) {
+			len = next_str - p;
+		} else {
+			len = strlen(p);
+		}
+		if (len > 0 && len <= TOKEN_MAX) {
+			strncpy(dev_str, p, len);
+			*(dev_str + len) = '\0';
+
+			if (sscanf(dev_str, "%x:%x:%x.%x", 
+				&seg, &bus, &slot, &func) != 4) {
+				if (sscanf(dev_str, "%x:%x.%x", 
+					&bus, &slot, &func) == 3) {
+					seg = 0;
+				} else {
+					/* failed to scan strings */
+					seg = -1;
+					bus = -1;
+				}
+			}
+			if (seg == pci_domain_nr(dev->bus) &&
+			    bus == dev->bus->number &&
+			    slot == PCI_SLOT(dev->devfn) &&
+			    func == PCI_FUNC(dev->devfn)) {
+				/* It's a target device */
+				return 1;
+			}
+		}
+		if (!next_str)
+			break;
+	}
+
+	return 0;
+}
+
+static void __devinit pci_pagealignmem_setup(char *str)
+{
+	strncpy(param_pagealignmem, str, PAGEALIGNMEM_PARAM_MAX);
+	param_pagealignmem[PAGEALIGNMEM_PARAM_MAX - 1] = '\0';
+}
+#endif  /* CONFIG_PAGEALIGNMEM_PARAM */
+
 static void __devinit pci_no_domains(void)
 {
 #ifdef CONFIG_PCI_DOMAINS
@@ -2059,6 +2123,8 @@ static int __devinit pci_setup(char *str)
 				pci_cardbus_io_size = memparse(str + 9, &str);
 			} else if (!strncmp(str, "cbmemsize=", 10)) {
 				pci_cardbus_mem_size = memparse(str + 10, &str);
+			} else if (!strncmp(str, "pagealignmem=", 13)) {
+				pci_pagealignmem_setup(str + 13);
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9de87e9..9d28dba 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -171,4 +171,13 @@ static inline int pci_ari_enabled(struct pci_dev *dev)
 	return dev->ari_enabled;
 }
 
+#ifdef CONFIG_PCI_QUIRKS
+extern int is_reassigndev(struct pci_dev *dev);
+extern void pci_disable_bridge_window(struct pci_dev *dev);
+#else
+#define is_reassigndev(dev) 0
+#endif
+extern resource_size_t
+	pci_resource_alignment(struct pci_dev *dev, struct resource *res);
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 5049a47..84c9825 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -32,6 +32,56 @@ int pcie_mch_quirk;
 EXPORT_SYMBOL(pcie_mch_quirk);
 
 #ifdef CONFIG_PCI_QUIRKS
+/*
+ * This quirk function disables the device and releases resources
+ * which is specified by kernel's boot parameter 'pci=pagealignmem='.
+ * Later on, kernel will assign page-aligned memory resource back
+ * to that device.
+ */
+static void __devinit quirk_release_resources(struct pci_dev *dev)
+{
+	int i;
+	struct resource *r;
+
+	if (!is_reassigndev(dev))
+		return;
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+		/* PCI Host Bridge isn't a target device */
+		printk(KERN_WARNING 
+			"PCI: Can't reassign resources to Host Bridge[%s].\n",
+			pci_name(dev));
+		return;
+	}
+
+	printk(KERN_INFO "PCI: Disable device and release resources [%s].\n",
+		pci_name(dev));
+	pci_disable_device(dev);
+
+	for (i=0; i < PCI_NUM_RESOURCES; i++) {
+		r = &dev->resource[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+
+		r->end = r->end - r->start;
+		r->start = 0;
+
+		if (i < PCI_BRIDGE_RESOURCES) {
+			pci_update_resource(dev, r, i);
+		}
+	}
+	/* need to disable bridge's resource window,
+	 * to make kernel enable to reassign new resource
+	 * window later on.
+	 */
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_disable_bridge_window(dev);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_release_resources);
+
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index ea979f2..767fc17 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -25,6 +25,7 @@
 #include <linux/ioport.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
+#include "pci.h"
 
 
 static void pbus_assign_resources_sorted(struct pci_bus *bus)
@@ -343,6 +344,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		int i;
+		int reassign = is_reassigndev(dev);
 		
 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 			struct resource *r = &dev->resource[i];
@@ -351,8 +353,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 			if (r->parent || (r->flags & mask) != type)
 				continue;
 			r_size = resource_size(r);
+			if (reassign)
+				r_size = ALIGN(r_size, PAGE_SIZE);
 			/* For bridges size != alignment */
-			align = resource_alignment(r);
+			align = pci_resource_alignment(dev, r);
 			order = __ffs(align) - 20;
 			if (order > 11) {
 				dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 2dbd96c..011d915 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -26,6 +26,26 @@
 #include "pci.h"
 
 
+/**
+ * pci_resource_alignment - calculate reassign resource's alignment
+ * @dev: PCI device to assign resource
+ * @res: Resource to align
+ *
+ * Returns alignment on success, 0 (invalid alignment) on failure.
+ */
+resource_size_t
+pci_resource_alignment(struct pci_dev *dev, struct resource *res)
+{
+	resource_size_t align = resource_alignment(res);
+
+	if (align)
+		if (is_reassigndev(dev) &&
+		    (res->flags & IORESOURCE_MEM))
+			align = ALIGN(align, PAGE_SIZE);
+
+	return align;
+}
+
 void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
 {
 	struct pci_bus_region region;
@@ -122,6 +142,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 	return err;
 }
 
+#ifdef CONFIG_PCI_QUIRKS
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+	dev_dbg(&dev->dev, "Disable bridge window\n");
+  
+ 	/* MMIO Base/Limit */
+ 	pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+  
+ 	/* Prefetchable MMIO Base/Limit */
+ 	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+ 	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+ 	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+#endif	/* CONFIG_PCI_QUIRKS */
+
 int pci_assign_resource(struct pci_dev *dev, int resno)
 {
 	struct pci_bus *bus = dev->bus;
@@ -132,7 +167,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	size = resource_size(res);
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
-	align = resource_alignment(res);
+	align = pci_resource_alignment(dev, res);
 	if (!align) {
 		dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
 			"alignment) %pR flags %#lx\n",
@@ -162,6 +197,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	} else {
 		res->flags &= ~IORESOURCE_STARTALIGN;
 		if (resno < PCI_BRIDGE_RESOURCES)
+			if (is_reassigndev(dev))
+				dev_dbg(&dev->dev, "BAR %d: assign resource "
+					"[%#llx - %#llx]\n", resno,
+					 (unsigned long long)res->start,
+					 (unsigned long long)res->end);
 			pci_update_resource(dev, res, resno);
 	}
 
@@ -223,7 +263,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 		if (!(r->flags) || r->parent)
 			continue;
 
-		r_align = resource_alignment(r);
+		r_align = pci_resource_alignment(dev, r);
 		if (!r_align) {
 			dev_warn(&dev->dev, "BAR %d: bogus alignment "
 				"%pR flags %#lx\n",
@@ -235,7 +275,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
 			struct resource_list *ln = list->next;
 
 			if (ln)
-				align = resource_alignment(ln->res);
+				align = pci_resource_alignment(ln->dev,
+								ln->res);
 
 			if (r_align > align) {
 				tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux