[RFC PATCH] kvm: BSimple stub driver with AER capabilities

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



KVM device-assignment implementation doesn't provide any mechanism to 
report PCI errors (related to the assigned device) to the guest VM. 
Similarly, events like suspend and resume aren't reported. This is a 
limitation to achieve high availability in a system where VMs are 
controlling devices directly.

>From previous discussion, it's understood that VFIO is a great solution 
for kvm devices-assignment and ideally this work should be part of it. 
Unfortunately, a solution is needed till it gets more mature.

The first step at reporting events and errors all the way up to the guest 
kernel is to provide a mechanism for the host kernel to notify userspace. 
This patches propose a solution based on pci-stub and UIO. Other solutions 
exists but this one was choosen for it's simplicity and compatibility with 
current model.

All comments are welcome.
Warning: Minimal testing.

thanks,
-Etienne

Signed-off-by: Etienne Martineau <etmartin@xxxxxxxxx>
---
 drivers/uio/Kconfig          |   11 ++
 drivers/uio/Makefile         |    1 +
 drivers/uio/uio_pci_stub.c   |  359 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/Kbuild         |    1 +
 include/linux/uio_pci_stub.h |   31 ++++
 5 files changed, 403 insertions(+), 0 deletions(-)
 create mode 100644 drivers/uio/uio_pci_stub.c
 create mode 100644 include/linux/uio_pci_stub.h

diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index bb44079..e4af9d4 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -94,4 +94,15 @@ config UIO_NETX
 	  To compile this driver as a module, choose M here; the module
 	  will be called uio_netx.
 
+config UIO_PCI_STUB
+	tristate "Simple stub driver with AER capabilities"
+	depends on PCI
+	help
+	  Say Y or M here if you want be able to reserve a PCI device
+	  when it is going to be assigned to a guest operating system.
+	  Also, this driver gives you the option to notify the guest
+	  operating system in case where the device report an PCI error.
+
+	  When in doubt, say N.
+
 endif
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
index 18fd818..c1eeedc 100644
--- a/drivers/uio/Makefile
+++ b/drivers/uio/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC)	+= uio_aec.o
 obj-$(CONFIG_UIO_SERCOS3)	+= uio_sercos3.o
 obj-$(CONFIG_UIO_PCI_GENERIC)	+= uio_pci_generic.o
 obj-$(CONFIG_UIO_NETX)	+= uio_netx.o
+obj-$(CONFIG_UIO_PCI_STUB)	+= uio_pci_stub.o
diff --git a/drivers/uio/uio_pci_stub.c b/drivers/uio/uio_pci_stub.c
new file mode 100644
index 0000000..18fadcb
--- /dev/null
+++ b/drivers/uio/uio_pci_stub.c
@@ -0,0 +1,359 @@
+/*
+ * uio_pci_stub.c - Simple stub driver with AER capabilities
+ *
+ * Copyright (C) 2010 Cisco Systems
+ * Author: Etienne Martineau <etmartin@xxxxxxxxx>
+ *
+ * Based on drivers/pci/pci-stub.c by Chris Wright,
+ * Copyright (C) 2008 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Usage is simple, allocate a new id to the uio_pci_stub driver and bind the
+ * device to it.  For example:
+ * 
+ * Since the driver does not declare any device ids, you must allocate
+ * id and bind the device to the driver yourself.  For example:
+ *
+ * # echo "8086 10f5" > /sys/bus/pci/drivers/uio_pci_stub/new_id
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/uio_pci_stub/bind
+ * # ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+ * .../0000:00:19.0/driver -> ../../../bus/pci/drivers/uio_pci_stub
+ *
+ * uio_pci_stub is equivalent to pci-stub when no extra parameter is 
+ * given to the module at load time. 'aer=1' will turn on PCIe AER error 
+ * reporting. 
+ *
+ * NOTE: There is no support for suspend and resume and current implementation
+ * is not based on eventfd.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/uio_pci_stub.h>
+
+static int debug=0;
+static int aer=0;
+static char ids[1024] __initdata;
+
+#define DRIVER_VERSION	"0.01"
+#define DRIVER_AUTHOR	"Etienne Martineau <etmartin@xxxxxxxxx>"
+#define DRIVER_DESC	"Simple stub driver with AER capabilities"
+MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the stub driver, format is "
+		 "\"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\""
+		 " and multiple comma separated entries can be specified");
+module_param_string(ids, ids, sizeof(ids), 0);
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
+module_param(debug, bool, 0644);
+MODULE_PARM_DESC(aer, "AER error reporting enabled or not");
+module_param(aer, bool, 0644);
+
+#define DPRINTK(fmt, args...)	\
+	do{	\
+		if(debug) \
+			printk(KERN_DEBUG "%s: " fmt, __func__ , ## args); \
+} while (0)
+
+struct uio_pci_stub_priv {
+	atomic_t sync;
+	pci_ers_result_t result;
+	struct semaphore sem;
+	char name[UIO_MAX_NAME_SIZE];
+};
+
+/*
+ * For every pci error handlers invoked, userspace is notified. It has
+ * access to the pci error code through 'logical BAR0.
+ *
+ * After each notification, Kernel will wait for user space to provide
+ * the pci error result. Upon timeout, kernel takes default action.
+ *
+ * Most is not all UIO drivers typically used 'value' to control the state of
+ * an interrupt in the interrupt controller. Here, 'value' transport 
+ * the pci error result.
+ */
+static int uio_pci_stub_control(struct uio_info *info, s32 value)
+{
+	struct uio_pci_stub_priv *priv = info->priv;
+	enum pci_error_result result=value;
+	pci_ers_result_t pci_result;
+
+	/* Sanity check */
+	switch(result){
+		case RESULT_NONE:
+			pci_result = PCI_ERS_RESULT_NONE;
+		break;
+		case RESULT_CAN_RECOVER:
+			pci_result = PCI_ERS_RESULT_CAN_RECOVER;
+		break;
+		case RESULT_NEED_RESET:
+			pci_result = PCI_ERS_RESULT_NEED_RESET;
+		break;
+		case RESULT_DISCONNECT:
+			pci_result = PCI_ERS_RESULT_DISCONNECT;
+		break;
+		case RESULT_RECOVERED:
+			pci_result = PCI_ERS_RESULT_RECOVERED;
+		break;
+		default:
+			return -EINVAL;
+	}
+
+	if(atomic_inc_and_test(&priv->sync)){
+		priv->result = pci_result;
+		up(&priv->sem);
+		return 0;
+	}
+	/* Userspace is out of sync */ 
+	return -EPIPE;
+}
+
+static int logical_bar_setup(struct uio_info *info, int n)
+{
+	void *ptr;
+
+	ptr = (void*)__get_free_pages(GFP_KERNEL,0);
+	if(!ptr)
+		return -ENOMEM;
+
+	info->mem[n].addr = virt_to_phys(ptr);
+	info->mem[n].size = PAGE_SIZE;
+	info->mem[n].memtype = UIO_MEM_LOGICAL;
+	info->mem[n].internal_addr = ptr;
+	return 0;
+}
+
+static void logical_bar_release(struct uio_info *info, int n)
+{
+	if(info->mem[n].internal_addr)
+		free_pages((long unsigned int)info->mem[n].internal_addr,0);
+}
+
+static int __devinit probe(struct pci_dev *dev,
+			   const struct pci_device_id *id)
+{
+	int ret = -ENODEV;
+	struct uio_info *info;
+	struct uio_pci_stub_priv *priv;
+
+	info = kzalloc(sizeof(struct uio_info), GFP_KERNEL);
+	if (!info){
+		ret = -ENOMEM;
+		goto bad;
+	}
+
+	priv = kzalloc(sizeof(struct uio_pci_stub_priv), GFP_KERNEL);
+	if (!priv){
+		ret = -ENOMEM;
+		goto bad1;
+	}
+
+	ret = logical_bar_setup(info, 0);
+	if(ret)
+		goto bad2;
+
+	info->priv = priv;
+	info->version = DRIVER_VERSION;
+	info->irqcontrol = uio_pci_stub_control;
+	info->irq = UIO_IRQ_CUSTOM;
+
+	snprintf(priv->name, UIO_MAX_NAME_SIZE,
+		FORMAT_UIO_DEV_NAME(dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), id->vendor, id->device));
+	info->name = priv->name;
+
+	init_MUTEX_LOCKED(&priv->sem);
+	atomic_set(&priv->sync, 0);
+	pci_set_drvdata(dev, info);
+
+	ret = uio_register_device(&dev->dev, info);
+	if(ret)
+		goto bad3;
+	
+	dev_printk(KERN_INFO, &dev->dev, "claimed by uio_pci_stub\n");
+	return 0;
+
+bad3:
+	logical_bar_release(info, 0);
+bad2:
+	kfree(priv);
+bad1:
+	kfree(info);
+bad:
+	return ret;
+}
+
+static void remove(struct pci_dev *dev)
+{
+	struct uio_info *info = pci_get_drvdata(dev);
+
+	uio_unregister_device(info);
+	pci_set_drvdata(dev, NULL);
+	logical_bar_release(info, 0);
+	kfree(info->priv);
+	kfree(info);
+}
+
+/* ------------------ PCI Error Recovery infrastructure  -------------- */
+static int notify_user(enum pci_error_code err_code, struct pci_dev *pdev)
+{
+	int err;
+	struct uio_info *info = pci_get_drvdata(pdev);
+	struct uio_pci_stub_priv *priv = info->priv;
+	struct uio_pci_stub_logical_bar *bar = info->mem[0].internal_addr;  
+
+	DPRINTK("AER error code %d",err_code);
+
+	if(err_code == RESUME){/* No reply expected */
+		bar->err_code = err_code;
+		uio_event_notify(info);
+		return 0;
+	}
+
+	/* Notify user space */
+	atomic_set(&priv->sync, -1);
+	bar->err_code = err_code;
+	uio_event_notify(info);
+
+	/* Wait till userspace post on the semaphore. Arbitrary timeout... */
+	err = down_timeout(&priv->sem, msecs_to_jiffies(50));
+	if(!err){
+		DPRINTK("AER result code %d",priv->result);
+		return priv->result;
+	}
+	
+	/* userspace post on the semaphore sometime after the timeout occurs */
+	if(!atomic_inc_and_test(&priv->sync))
+		down(&priv->sem);
+	
+	printk(KERN_INFO "AER userspace not responding");
+	return PCI_ERS_RESULT_NONE;
+}
+
+/**
+ * error_detected - called when PCI error is detected.
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ */
+static pci_ers_result_t error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	return notify_user(ERROR_DETECTED, pdev);
+}
+
+/**
+ * mmio_enabled
+ * MMIO has been re-enabled, but not DMA 
+ */
+static pci_ers_result_t mmio_enabled(struct pci_dev *pdev)
+{
+	return notify_user(MMIO_ENABLED, pdev);
+}
+
+/**
+ * link_reset
+ * PCI Express link has been reset 
+ */
+static pci_ers_result_t link_reset(struct pci_dev *pdev)
+{
+	return notify_user(LINK_RESET, pdev);
+}
+
+/**
+ * slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t slot_reset(struct pci_dev *pdev)
+{
+	return notify_user(SLOT_RESET, pdev);
+}
+
+/**
+ * resume - resume normal operations
+ * @pdev: Pointer to PCI device
+ *
+ * Resume normal operations after an error recovery
+ * sequence has been completed.
+ */
+static void resume(struct pci_dev *pdev)
+{
+	notify_user(RESUME, pdev);
+}
+
+static struct pci_error_handlers err_handler = {
+	.error_detected = error_detected,
+	.mmio_enabled = mmio_enabled,
+	.link_reset = link_reset,
+	.slot_reset = slot_reset,
+	.resume = resume,
+};
+
+static struct pci_driver driver = {
+	.name = "uio_pci_stub",
+	.id_table = NULL, /* only dynamic id's */
+	.probe = probe,
+	.remove = remove,
+};
+
+static int __init init(void)
+{
+	char *p, *id;
+	int rc;
+
+	pr_info(DRIVER_DESC " %s" " version: " DRIVER_VERSION "\n",
+		aer?"Turned on":"Turned off");
+
+	if(aer)
+		driver.err_handler = &err_handler;
+
+	rc = pci_register_driver(&driver);
+	if (rc)
+		return rc;
+
+	/* add ids specified in the module parameter */
+	p = ids;
+	while ((id = strsep(&p, ","))) {
+		unsigned int vendor, device, subvendor = PCI_ANY_ID,
+			subdevice = PCI_ANY_ID, class=0, class_mask=0;
+		int fields;
+
+		fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
+				&vendor, &device, &subvendor, &subdevice,
+				&class, &class_mask);
+
+		if (fields < 2) {
+			printk(KERN_WARNING
+			       "pci-stub: invalid id string \"%s\"\n", id);
+			continue;
+		}
+
+		printk(KERN_INFO
+		       "pci-stub: add %04X:%04X sub=%04X:%04X cls=%08X/%08X\n",
+		       vendor, device, subvendor, subdevice, class, class_mask);
+
+		rc = pci_add_dynid(&driver, vendor, device,
+				   subvendor, subdevice, class, class_mask, 0);
+		if (rc)
+			printk(KERN_WARNING
+			       "pci-stub: failed to add dynamic id (%d)\n", rc);
+	}
+
+	return 0;
+}
+
+static void __exit cleanup(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(init);
+module_exit(cleanup);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 831c463..045a5de 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -362,6 +362,7 @@ header-y += udf_fs_i.h
 header-y += udp.h
 header-y += uinput.h
 header-y += uio.h
+header-y += uio_pci_stub.h
 header-y += ultrasound.h
 header-y += un.h
 header-y += unistd.h
diff --git a/include/linux/uio_pci_stub.h b/include/linux/uio_pci_stub.h
new file mode 100644
index 0000000..873c407
--- /dev/null
+++ b/include/linux/uio_pci_stub.h
@@ -0,0 +1,31 @@
+#ifndef __LINUX_UIO_PCI_STUB_H
+#define __LINUX_UIO_PCI_STUB_H
+
+#ifndef UIO_MAX_NAME_SIZE
+#define UIO_MAX_NAME_SIZE 64
+#endif
+
+#define FORMAT_UIO_DEV_NAME(vendorid,deviceid,busnr,dev,fcn)\
+	"%x:%x.%x  %x:%x",vendorid,deviceid,busnr,dev,fcn
+
+enum pci_error_code{
+	ERROR_DETECTED,
+	MMIO_ENABLED,
+	LINK_RESET,
+	SLOT_RESET,
+	RESUME,
+};
+
+enum pci_error_result{
+	RESULT_NONE,
+	RESULT_CAN_RECOVER,
+	RESULT_NEED_RESET,
+	RESULT_DISCONNECT,
+	RESULT_RECOVERED,
+};
+
+struct uio_pci_stub_logical_bar {
+	enum pci_error_code err_code;
+};
+
+#endif
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux