[PATCH][REPOST] PCI: Enable Function Level Reset(FLR) for PCI-E

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



(Dear Matthew, It have been 3 weeks and I didn't get any comments, even with
ping more than once. So can we think this is good enough? This also target for
2.6.28...)

The FLR mechanism enables software to quiesce and reset Endpoint hardware
with Function-level granularity.

Current the usage model for VT-d support in KVM. We'd better to do FLR
before assigning device to the guest.

This can also be used with other purpose. Please refer to PCI Express spec
chapter 6.6.2.

The patch contain two functions. pcie_reset_function() is the common one to
be called, contain some action to quiesce device.
pcie_execute_reset_function() just execute Function Level Reset.

Updated the patch according to Matthew Wilcox's comments, and reduce the retry
time to 1s according to Yu's comment.

Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx>
CC: Matthew Wilcox <matthew@xxxxxx>
---
 drivers/pci/pci.c        |  100 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      |    2 +
 include/linux/pci_regs.h |    2 +
 3 files changed, 104 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9884bb..1a85164 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -18,6 +18,7 @@
 #include <linux/log2.h>
 #include <linux/pci-aspm.h>
 #include <linux/pm_wakeup.h>
+#include <linux/interrupt.h>
 #include <asm/dma.h>	/* isa_dma_bridge_buggy */
 #include "pci.h"
 
@@ -1691,6 +1692,105 @@ EXPORT_SYMBOL(pci_set_dma_seg_boundary);
 #endif
 
 /**
+ * pcie_reset_function() - quiesce device and execute Function Level Reset
+ * @dev: Device to reset
+ *
+ * Function Level Reset is a feature introduced by the PCIe spec version
+ * 2.0.  It allows one function of a multifunction PCI device to be
+ * reset without affecting the other functions in the same device.
+ *
+ * This function would do some more actions to quiesce the device.
+ *
+ * Returns 0 if the device was successfully reset or -ENOTTY if
+ * the device doesn't support FLR.
+ *
+ * If the device support FLR, it must guarantee the execution would finished in
+ * 100ms(after reset bit was set) and won't fail.
+ */
+int pcie_reset_function(struct pci_dev *dev)
+{
+	u32 cap;
+	int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	int r;
+
+	if (!exppos)
+		return -ENOTTY;
+	pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap);
+	if (!(cap & PCI_EXP_DEVCAP_FLR))
+		return -ENOTTY;
+
+	if (!dev->msi_enabled && !dev->msix_enabled)
+		disable_irq(dev->irq);
+	pci_save_state(dev);
+
+	r = pcie_execute_reset_function(dev);
+
+	pci_restore_state(dev);
+	if (!dev->msi_enabled && !dev->msix_enabled)
+		enable_irq(dev->irq);
+
+	return r;
+}
+EXPORT_SYMBOL(pcie_reset_function);
+
+/**
+ * pcie_execute_reset_function() - execute Function Level Reset
+ * @dev: Device to reset
+ *
+ * Resetting the device will make the contents of PCI configuration space
+ * random, so any caller of this must be prepared to reinitialise the
+ * device including MSI, bus mastering, BARs, decoding IO and memory spaces,
+ * etc.
+ *
+ * If some common action needed to quiesce the device, pcie_reset_function() is
+ * another option.
+ *
+ * Returns 0 if the device was successfully reset or -ENOTTY if
+ * the device doesn't support FLR.
+ *
+ * If the device support FLR, it must guarantee the execution would finished in
+ * 100ms(after reset bit was set) and won't fail.
+ */
+int pcie_execute_reset_function(struct pci_dev *dev)
+{
+	u16 status;
+	u32 cap;
+	int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+
+	if (!exppos)
+		return -ENOTTY;
+	pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap);
+	if (!(cap & PCI_EXP_DEVCAP_FLR))
+		return -ENOTTY;
+
+	pci_block_user_cfg_access(dev);
+
+	/* Quiesce the device completely */
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+	/* Wait for Transaction Pending bit clean */
+	msleep(100);
+	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+	if (status & PCI_EXP_DEVSTA_TRPND) {
+		dev_info(&dev->dev, "Function reset incomplete after 100ms, "
+			"sleeping for 1 seconds\n");
+		ssleep(1);
+		pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+		if (status & PCI_EXP_DEVSTA_TRPND)
+			dev_info(&dev->dev, "Function reset still incomplete "
+				"after 1s, reset anyway\n");
+	}
+
+	pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
+				PCI_EXP_DEVCTL_BCR_FLR);
+	mdelay(100);
+
+	pci_unblock_user_cfg_access(dev);
+	return 0;
+}
+EXPORT_SYMBOL(pcie_execute_reset_function);
+
+/**
  * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
  * @dev: PCI device to query
  *
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c0e1400..a953ad9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -626,6 +626,8 @@ int pcix_get_mmrbc(struct pci_dev *dev);
 int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
 int pcie_get_readrq(struct pci_dev *dev);
 int pcie_set_readrq(struct pci_dev *dev, int rq);
+int pcie_reset_function(struct pci_dev *dev);
+int pcie_execute_reset_function(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 450684f..ee55f27 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -377,6 +377,7 @@
 #define  PCI_EXP_DEVCAP_RBER	0x8000	/* Role-Based Error Reporting */
 #define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
 #define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
+#define  PCI_EXP_DEVCAP_FLR     0x10000000 /* Function Level Reset */
 #define PCI_EXP_DEVCTL		8	/* Device Control */
 #define  PCI_EXP_DEVCTL_CERE	0x0001	/* Correctable Error Reporting En. */
 #define  PCI_EXP_DEVCTL_NFERE	0x0002	/* Non-Fatal Error Reporting Enable */
@@ -389,6 +390,7 @@
 #define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
 #define  PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800  /* Enable No Snoop */
 #define  PCI_EXP_DEVCTL_READRQ	0x7000	/* Max_Read_Request_Size */
+#define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
 #define PCI_EXP_DEVSTA		10	/* Device Status */
 #define  PCI_EXP_DEVSTA_CED	0x01	/* Correctable Error Detected */
 #define  PCI_EXP_DEVSTA_NFED	0x02	/* Non-Fatal Error Detected */
-- 
1.5.4.5

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux