[RFD][PATCH] pcielw An alternate pcie hotplug driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



What follows below is my alternate pcie hotplug driver.
It is very stupid, very simple and very robust.

This driver should work on any pcie hotplug bridge that
only has support for the interrupt when the pcie link
comes or down, and that sets the hotplug and the hotplug
surprise bits.

I wrote this because in my environment the pciehp driver
totally fails and 500 lines of code are much easier to
debug than 3000.

Now that I have the code working I'm looking for the best
path to get a driver I can use into the mainstream kernel.

What I would like to do is:
- Generalize a bit more and move pcielw_verify_device
  and pcielw_configure_device into the pci core.

  They aren't doing anything that is specific to pcie hotplug
  at this point.

- Create a generic pci workqueue that that all hotplug scanning goes
  through.

  The pci addition scanning and removal code is not thread safe and
  you can get some pretty horrible things happening if you have if you
  happen to add or remove a pci device that is a hotplug controller.

  Currently that is all too possible with the fakephp, as well as my
  weird case that actually has nested pcie hotplug controllers.

  Adding one official workqueue looks much saner in the short term
  than sorting out and fixing the locking issues as all of the hotplug
  work happens on workqueues anyway.  We just need a generic to filter
  all of the work through.

- Modify most of the other pci hotplug drivers to use generic logic
  instead of rolling these kinds of functions themselves.

- Modifying the pciehp driver so that on hardware without any fancy
  buttons or weird firmware support the pciehp driver devolves into
  essentially my current pcielw driver.

As long as my patches don't get stuck in conversations on what is the
perfect color to paint a shed and can get patches that make things
better merged I should have enough time to do that work.

On the other hand if there are big reservations about what I am
proposing or the process is slow.  I would like to just cleanup
and merge my pcielw driver (below) and maintain that.

Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>

---
 drivers/pci/hotplug/Makefile |    1 +
 drivers/pci/hotplug/pcielw.c |  452 ++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pcie/Kconfig     |   12 ++
 3 files changed, 465 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 2aa117c..219b055 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the Linux kernel pci hotplug controller drivers.
 #
 
+obj-$(CONFIG_HOTPLUG_PCI_PCIELW)	+= pcielw.o
 obj-$(CONFIG_HOTPLUG_PCI)		+= pci_hotplug.o
 obj-$(CONFIG_HOTPLUG_PCI_COMPAQ)	+= cpqphp.o
 obj-$(CONFIG_HOTPLUG_PCI_IBM)		+= ibmphp.o
diff --git a/drivers/pci/hotplug/pcielw.c b/drivers/pci/hotplug/pcielw.c
new file mode 100644
index 0000000..a3af0d4
--- /dev/null
+++ b/drivers/pci/hotplug/pcielw.c
@@ -0,0 +1,452 @@
+/*
+ * PCI Express link watch driver
+ * 
+ * Copyright (C) 2009 AristaNetworks GPLv2
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pcieport_if.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+
+#define NAME "pcielw"
+
+struct slot_cmd_state {
+	int			always_available;
+	spinlock_t		lock;
+	wait_queue_head_t	queue;
+	struct timer_list	timer;
+};
+
+struct pcielw_device {
+	struct pcie_device 	*pcie;
+	unsigned 		cap_base;
+	unsigned 		slot_capabilities;
+	struct slot_cmd_state 	cmd;
+	struct work_struct	work;
+};
+
+static struct workqueue_struct *pcielw_wq;
+
+static int pcielw_read_configw(struct pcielw_device *dev, int reg, u16 *value)
+{
+	struct pci_dev *pdev = dev->pcie->port;
+	return pci_read_config_word(pdev, dev->cap_base + reg, value);
+}
+
+static int pcielw_read_configl(struct pcielw_device *dev, int reg, u32 *value)
+{
+	struct pci_dev *pdev = dev->pcie->port;
+	return pci_read_config_dword(pdev, dev->cap_base + reg, value);
+}
+
+static int pcielw_write_configw(struct pcielw_device *dev, int reg, u16 value)
+{
+	struct pci_dev *pdev = dev->pcie->port;
+	return pci_write_config_word(pdev, dev->cap_base + reg, value);
+}
+
+static int pcielw_write_configl(struct pcielw_device *dev, int reg, u32 value)
+{
+	struct pci_dev *pdev = dev->pcie->port;
+	return pci_write_config_dword(pdev, dev->cap_base + reg, value);
+}
+
+static int pcielw_setup_io(struct pcielw_device *dev)
+{
+	int ret = 0;
+
+	dev->cap_base = pci_find_capability(dev->pcie->port, PCI_CAP_ID_EXP);
+	if (!dev->cap_base)  {
+		dev_err(&dev->pcie->device, "Cannot find PCI Express capability\n");
+		ret = -ENOENT;
+	}
+	return ret;
+}
+
+static void pcielw_cmd_available_timeout(unsigned long data)
+{
+	struct pcielw_device *dev = (struct pcielw_device *)data;
+
+	wake_up(&dev->cmd.queue);
+}
+
+static int pcielw_set_cmd(struct pcielw_device *dev, u16 cmd, u16 mask)
+{
+	DEFINE_WAIT(wait);
+	u16 slot_ctrl;
+	int ret;
+
+	/* Wait until the slot cmd register is available to be written */
+	spin_lock_irq(&dev->cmd.lock);
+	for (;;) {
+		prepare_to_wait(&dev->cmd.queue, &wait, TASK_UNINTERRUPTIBLE);
+		if (!timer_pending(&dev->cmd.timer))
+			break;
+
+		spin_unlock_irq(&dev->cmd.lock);
+		schedule();
+		spin_lock_irq(&dev->cmd.lock);
+	}
+	finish_wait(&dev->cmd.queue, &wait);
+
+	/* read and modify the slot control register */
+	ret = pcielw_read_configw(dev, PCI_EXP_SLTCTL, &slot_ctrl);
+	if (ret)
+		goto out;
+
+	slot_ctrl &= ~mask;
+	slot_ctrl |= (cmd & mask);
+
+	/* Start the timer in case we don't get an interrupt */
+	if (!dev->cmd.always_available) {
+		dev->cmd.timer.function = pcielw_cmd_available_timeout;
+		dev->cmd.timer.data = (unsigned long)dev;
+		dev->cmd.timer.expires = jiffies + HZ;
+		add_timer(&dev->cmd.timer);
+	}
+
+	/* Finally write the new value */
+	ret = pcielw_write_configw(dev, PCI_EXP_SLTCTL, slot_ctrl);
+	if (ret)
+		goto out;
+out:
+	spin_unlock_irq(&dev->cmd.lock);
+	return ret;
+}
+
+#define SLOT_EVENT_MASK ( \
+	PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | PCI_EXP_SLTSTA_MRLSC | \
+	PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC )
+
+static void pcielw_setup_slot_cmd(struct pcielw_device *dev)
+{
+	/* Initialize the state for sending slot commands */
+	spin_lock_init(&dev->cmd.lock);
+	init_waitqueue_head(&dev->cmd.queue);
+	init_timer(&dev->cmd.timer);
+	dev->cmd.always_available =
+		!!(dev->slot_capabilities & PCI_EXP_SLTCAP_NCCS);
+
+	/* Clear pending slot status bits */
+	pcielw_write_configw(dev, PCI_EXP_SLTSTA, SLOT_EVENT_MASK);
+}
+
+#define NOTIFICATION_MASK ( \
+	PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_PFDE | \
+	PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PDCE | \
+	PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE | \
+	PCI_EXP_SLTCTL_DLLSCE \
+)
+
+static int pcielw_enable_notification(struct pcielw_device *dev)
+{
+	u16 cmd, mask;
+
+	/* FIXME filter by what notifications the hardware supports!! */
+	cmd =  PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_DLLSCE;
+	
+	mask = (PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_PFDE |
+		PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PDCE |
+		PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE |
+		PCI_EXP_SLTCTL_DLLSCE);
+
+	return pcielw_set_cmd(dev, cmd, NOTIFICATION_MASK);
+}
+
+static int pcielw_disable_notification(struct pcielw_device *dev)
+{
+	return pcielw_set_cmd(dev, 0, NOTIFICATION_MASK);
+	/* FIXME should I wait for this to take effect?? */
+}
+
+static int pcielw_verify_device(struct pci_dev *dev)
+{
+	/* test to see if the preexisting configuraiton appears valid */
+	u32 id;
+	u8 hdr_type;
+	u32 class;
+	u16 cmd;
+
+	/* If I can not read or validate the vendor and device id
+	 * the preexisting configuration is invalid.
+	 */
+	if (pci_read_config_dword(dev, PCI_VENDOR_ID, &id))
+		return 0;
+	if (id != ((dev->device << 16) | dev->vendor))
+		return 0;
+
+	/* Verify the header type */
+	if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+		return 0;
+	if (dev->hdr_type != (hdr_type & 0x7f))
+		return 0;
+
+	/* Verify the class */
+	if (pci_read_config_dword(dev, PCI_CLASS_REVISION, &class))
+		return 0;
+	if (class != ((dev->class << 8) | dev->revision))
+		return 0;
+
+	/* Verify the device has been enabled.
+	 * cmd should be cleared on reset
+	 */
+	if (pci_read_config_word(dev, PCI_COMMAND, &cmd))
+		return 0;
+	if (cmd == 0)
+		return 0;
+
+	/* Verify my bridge has the expected bus numbers */
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+		u32 busses, expected_busses;
+
+		if (pci_read_config_dword(dev, PCI_PRIMARY_BUS, &busses))
+			return 0;
+
+		expected_busses = dev->subordinate->primary;
+		expected_busses |= dev->subordinate->secondary << 8;
+		expected_busses |= dev->subordinate->subordinate << 16;
+
+		if ((busses & 0xffffff) != expected_busses)
+			return 0;
+	}
+
+	/* 
+	 * Enough.  A working device should have had to change one of
+	 * the registers I have checked, since power up.
+	 */
+	return 1;
+}
+
+static int pcielw_configure_device(struct pcielw_device *dev)
+{
+	struct pci_bus *bus = dev->pcie->port->subordinate;
+	struct pci_dev *pdev;
+	int max;
+	int pass;
+	int num;
+	int ret = 0;
+
+	/* Verify the state of the bus */
+	if (unlikely(!list_empty(&bus->devices))) {
+		int verified = 1;
+		list_for_each_entry(pdev, &bus->devices, bus_list) {
+			if (!pcielw_verify_device(pdev)) {
+				dev_info(&dev->pcie->device, "Device %s already exists removing.\n",
+					pci_name(pdev));
+				verified = 0;
+				break;
+			}
+		}
+		if (verified) {
+			dev_info(&dev->pcie->device, "Devices already exist and appears valid.\n");
+			goto out;
+		} else 
+			pci_remove_behind_bridge(dev->pcie->port);
+	}
+
+	/* This is a scan of an empty bus so there are no child busses yet */
+	max = bus->secondary;
+
+	num = pci_scan_slot(bus, PCI_DEVFN(0, 0));
+	if (num == 0) {
+		dev_info(&dev->pcie->device, "No devices found\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	for (pass = 0; pass < 2; pass++) {
+		list_for_each_entry(pdev, &bus->devices, bus_list) {
+			if ((pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) ||
+			    (pdev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
+				max = pci_scan_bridge(bus, pdev, max, pass);
+				if (pass && pdev->subordinate)
+					pci_bus_size_bridges(pdev->subordinate);
+			}
+		}
+	}
+	pci_bus_assign_resources(bus);
+	pci_enable_bridges(bus);
+	pci_bus_add_devices(bus);
+out:
+	return ret;
+}
+
+static void pcielw_handle_datalink_change(struct pcielw_device *dev)
+{
+	const char *status;
+	u16 link_status;
+
+	if (pcielw_read_configw(dev, PCI_EXP_LNKSTA, &link_status))
+		return;
+
+	status = (link_status & PCI_EXP_LNKSTA_DLLLA) ? "up": "down"; 
+
+	dev_info(&dev->pcie->device, "link %s\n", status);
+	if (link_status & PCI_EXP_LNKSTA_DLLLA) {
+		mdelay(100);
+		/* It should now be safe to read/write config space registers */
+		//pci_read_config_....();
+		/* FIXME handle the weirdness that could cause this to become
+		 * a one second delay.
+		 */
+		pcielw_configure_device(dev);
+	}
+	else {
+		pci_remove_behind_bridge(dev->pcie->port);
+	}
+	dev_info(&dev->pcie->device, "link %s processing complete\n", status);
+}
+
+static void pcielw_datalink_change_work(struct work_struct *work)
+{
+	struct pcielw_device *dev =
+		container_of(work, struct pcielw_device, work);
+	pcielw_handle_datalink_change(dev);
+}
+
+static irqreturn_t pcielw_isr(int irq, void *dev_id)
+{
+	struct pcielw_device *dev = dev_id;
+	u16 slot_status, slot_event;
+
+	if (pcielw_read_configw(dev, PCI_EXP_SLTSTA, &slot_status))
+		return IRQ_NONE;
+
+	/* Hack to detect an irq arriving after a card has been removed */
+	if (slot_status == 0xffff)
+		return IRQ_NONE;
+
+	slot_event = slot_status & SLOT_EVENT_MASK;
+	if (!slot_event)
+		return IRQ_NONE;
+
+	/* Clear the events that we are handling */
+	pcielw_write_configw(dev, PCI_EXP_SLTSTA, slot_event);
+	
+	/* Note that it is safe to send the controller another command */
+	if (slot_event & PCI_EXP_SLTSTA_CC) {
+		if (del_timer(&dev->cmd.timer))
+			wake_up(&dev->cmd.queue);
+	}
+	
+	if (slot_event & PCI_EXP_SLTSTA_DLLSC) {
+		queue_work(pcielw_wq, &dev->work);
+	}
+	
+	slot_event &= ~PCI_EXP_SLTSTA_CC;
+	slot_event &= ~PCI_EXP_SLTSTA_DLLSC;
+	if (slot_event)
+		dev_info(&dev->pcie->device, "unhandled event %x\n",
+			slot_event);
+
+	return IRQ_HANDLED;
+}
+
+
+static int pcielw_probe(struct pcie_device *pcie)
+{
+	struct pcielw_device *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Initialize the pci express device state */
+	dev->pcie = pcie;
+	ret = pcielw_setup_io(dev);
+	if (ret)
+		goto err_kfree;
+
+	/* Cache the device capabilities */
+	ret = pcielw_read_configl(dev, PCI_EXP_SLTCAP, &dev->slot_capabilities);
+	if (ret)
+		goto err_kfree;
+
+	/* Setup the state for sending slot commands */
+	pcielw_setup_slot_cmd(dev); 
+
+	INIT_WORK(&dev->work, pcielw_datalink_change_work);
+
+	/* Setup to receive interrupts */
+	ret = request_irq(pcie->irq, pcielw_isr, IRQF_SHARED, NAME, dev);
+	if (ret) {
+		dev_err(&dev->pcie->device, "request_irq_failed: %d\n", ret);
+		goto err_stop;
+	}
+
+	/* Now that interrupts are registered tell the device to generate them */
+	ret = pcielw_enable_notification(dev);
+	if (ret)
+		goto err_stop;
+
+	set_service_data(pcie, dev);
+
+	/* Fake a link change event to ensure devices that were added or removed
+	 * before the driver was loaded are found.
+	 */
+	queue_work(pcielw_wq, &dev->work);
+	return 0;
+err_stop:
+	cancel_work_sync(&dev->work);
+err_kfree:
+	kfree(dev);
+err:
+	return ret;
+}
+
+static void pcielw_remove(struct pcie_device *pcie)
+{
+	struct pcielw_device *dev = get_service_data(pcie);
+
+	pcielw_disable_notification(dev);
+	/* FIXME what of threads waiting for the cmd queue to become available? */
+	free_irq(dev->pcie->irq, dev);
+	del_timer_sync(&dev->cmd.timer);
+	cancel_work_sync(&dev->work);
+	kfree(dev);
+}
+
+static struct pcie_port_service_driver lwdriver_portdrv = {
+	.name		= NAME,
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_HP,
+
+	.probe		= pcielw_probe,
+	.remove		= pcielw_remove,
+};
+
+static int __init pcielw_init(void)
+{
+	int ret;
+
+	pcielw_wq = create_singlethread_workqueue("pcielwd");
+	if (!pcielw_wq)
+		return -ENOMEM;
+
+	ret = pcie_port_service_register(&lwdriver_portdrv);
+	if (ret < 0)
+		destroy_workqueue(pcielw_wq);
+
+	return ret;
+}
+
+static void __exit pcielw_cleanup(void)
+{
+	pcie_port_service_unregister(&lwdriver_portdrv);
+	destroy_workqueue(pcielw_wq);
+}
+
+MODULE_AUTHOR("Eric Biederman <ebiederm@xxxxxxxxxxxx>");
+MODULE_LICENSE("GPL");
+
+module_init(pcielw_init);
+module_exit(pcielw_cleanup);
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 5a0c6ad..6fa40ed 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -25,6 +25,18 @@ config HOTPLUG_PCI_PCIE
 
 	  When in doubt, say N.
 
+config HOTPLUG_PCI_PCIELW
+	tristate "PCI Express link watch driver"
+	depends on HOTPLUG_PCI && PCIEPORTBUS
+	help
+	  Say Y here if you have a motherboard that supports PCI Express Native
+	  Hotplug
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pcielw
+
+	  When in doubt, say N.
+
 source "drivers/pci/pcie/aer/Kconfig"
 
 #
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux