The FLR mechanism enables software to quiesce and reset Endpoint hardware with Function-level granularity. Current the usage model for VT-d support in KVM. We'd better to do FLR before assigning device to the guest. This can also be used with other purpose. Please refer to PCI Express spec chapter 6.6.2. The patch contain two functions. pcie_reset_function() is the common one to be called, contain some action to quiesce device. pcie_execute_reset_function() just execute Function Level Reset. Updated the patch according Matthew Wilcox's comments. Signed-off-by: Sheng Yang <sheng.yang@xxxxxxxxx> CC: Matthew Wilcox <matthew@xxxxxx> --- drivers/pci/pci.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 2 + include/linux/pci_regs.h | 2 + 3 files changed, 104 insertions(+), 0 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c9884bb..1a85164 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -18,6 +18,7 @@ #include <linux/log2.h> #include <linux/pci-aspm.h> #include <linux/pm_wakeup.h> +#include <linux/interrupt.h> #include <asm/dma.h> /* isa_dma_bridge_buggy */ #include "pci.h" @@ -1691,6 +1692,105 @@ EXPORT_SYMBOL(pci_set_dma_seg_boundary); #endif /** + * pcie_reset_function() - quiesce device and execute Function Level Reset + * @dev: Device to reset + * + * Function Level Reset is a feature introduced by the PCIe spec version + * 2.0. It allows one function of a multifunction PCI device to be + * reset without affecting the other functions in the same device. + * + * This function would do some more actions to quiesce the device. + * + * Returns 0 if the device was successfully reset or -ENOTTY if + * the device doesn't support FLR. + * + * If the device support FLR, it must guarantee the execution would finished in + * 100ms(after reset bit was set) and won't fail. + */ +int pcie_reset_function(struct pci_dev *dev) +{ + u32 cap; + int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP); + int r; + + if (!exppos) + return -ENOTTY; + pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap); + if (!(cap & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + if (!dev->msi_enabled && !dev->msix_enabled) + disable_irq(dev->irq); + pci_save_state(dev); + + r = pcie_execute_reset_function(dev); + + pci_restore_state(dev); + if (!dev->msi_enabled && !dev->msix_enabled) + enable_irq(dev->irq); + + return r; +} +EXPORT_SYMBOL(pcie_reset_function); + +/** + * pcie_execute_reset_function() - execute Function Level Reset + * @dev: Device to reset + * + * Resetting the device will make the contents of PCI configuration space + * random, so any caller of this must be prepared to reinitialise the + * device including MSI, bus mastering, BARs, decoding IO and memory spaces, + * etc. + * + * If some common action needed to quiesce the device, pcie_reset_function() is + * another option. + * + * Returns 0 if the device was successfully reset or -ENOTTY if + * the device doesn't support FLR. + * + * If the device support FLR, it must guarantee the execution would finished in + * 100ms(after reset bit was set) and won't fail. + */ +int pcie_execute_reset_function(struct pci_dev *dev) +{ + u16 status; + u32 cap; + int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP); + + if (!exppos) + return -ENOTTY; + pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap); + if (!(cap & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + pci_block_user_cfg_access(dev); + + /* Quiesce the device completely */ + pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); + + /* Wait for Transaction Pending bit clean */ + msleep(100); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) { + dev_info(&dev->dev, "Function reset incomplete after 100ms, " + "sleeping for 5 seconds\n"); + ssleep(5); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) + dev_info(&dev->dev, "Function reset still incomplete " + "after 5s, reset anyway\n"); + } + + pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR); + mdelay(100); + + pci_unblock_user_cfg_access(dev); + return 0; +} +EXPORT_SYMBOL(pcie_execute_reset_function); + +/** * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count * @dev: PCI device to query * diff --git a/include/linux/pci.h b/include/linux/pci.h index c0e1400..a953ad9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,6 +626,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int pcie_reset_function(struct pci_dev *dev); +int pcie_execute_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 450684f..ee55f27 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -377,6 +377,7 @@ #define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */ #define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ #define PCI_EXP_DEVCTL 8 /* Device Control */ #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ @@ -389,6 +390,7 @@ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ #define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ #define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ #define PCI_EXP_DEVSTA 10 /* Device Status */ #define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ #define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ -- 1.5.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html