The patch exports functions to be used by new ioctl commands, which will be introduced in subsequent patch, to support EEH functinality for VFIO PCI device. Signed-off-by: Gavin Shan <gwshan@xxxxxxxxxxxxxxxxxx> --- arch/powerpc/include/asm/eeh.h | 15 +++ arch/powerpc/kernel/eeh.c | 286 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 34a2d83..ffc95e7 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -26,6 +26,7 @@ #include <linux/string.h> #include <linux/time.h> +struct iommu_table; struct pci_dev; struct pci_bus; struct device_node; @@ -191,6 +192,8 @@ enum { #define EEH_OPT_ENABLE 1 /* EEH enable */ #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ #define EEH_OPT_THAW_DMA 3 /* DMA enable */ +#define EEH_OPT_GET_PE_ADDR 0 /* Get PE addr */ +#define EEH_OPT_GET_PE_MODE 1 /* Get PE mode */ #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ @@ -198,6 +201,11 @@ enum { #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ +#define EEH_PE_STATE_NORMAL 0 /* Normal state */ +#define EEH_PE_STATE_RESET 1 /* PE reset */ +#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Stopped */ +#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA */ +#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ #define EEH_RESET_HOT 1 /* Hot reset */ #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ @@ -305,6 +313,13 @@ void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); +int eeh_dev_open(struct pci_dev *pdev); +void eeh_dev_release(struct pci_dev *pdev); +struct eeh_pe *eeh_iommu_table_to_pe(struct iommu_table *tbl); +int eeh_pe_set_option(struct eeh_pe *pe, int option); +int eeh_pe_get_state(struct eeh_pe *pe); +int eeh_pe_reset(struct eeh_pe *pe, int option); +int eeh_pe_configure(struct eeh_pe *pe); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 3bc8b12..30693c1 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -40,6 +40,7 @@ #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> +#include <asm/iommu.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> #include <asm/rtas.h> @@ -108,6 +109,9 @@ struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); +/* Lock to protect passed flags */ +static DEFINE_MUTEX(eeh_dev_mutex); + /* Buffer for reporting pci register dumps. Its here in BSS, and * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. @@ -1106,6 +1110,288 @@ void eeh_remove_device(struct pci_dev *dev) edev->mode &= ~EEH_DEV_SYSFS; } +/** + * eeh_dev_open - Mark EEH device and PE as passed through + * @pdev: PCI device + * + * Mark the indicated EEH device and PE as passed through. + * In the result, the EEH errors detected on the PE won't be + * reported. The owner of the device will be responsible for + * detection and recovery. + */ +int eeh_dev_open(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + + mutex_lock(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) { + mutex_unlock(&eeh_dev_mutex); + return -ENODEV; + } + + /* No EEH device ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) { + mutex_unlock(&eeh_dev_mutex); + return -ENODEV; + } + + eeh_dev_set_passed(edev, true); + eeh_pe_set_passed(edev->pe, true); + mutex_unlock(&eeh_dev_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(eeh_dev_open); + +/** + * eeh_dev_release - Reclaim the ownership of EEH device + * @pdev: PCI device + * + * Reclaim ownership of EEH device, potentially the corresponding + * PE. In the result, the EEH errors detected on the PE will be + * reported and handled as usual. + */ +void eeh_dev_release(struct pci_dev *pdev) +{ + bool release_pe = true; + struct eeh_pe *pe = NULL; + struct eeh_dev *tmp, *edev; + + mutex_lock(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) { + mutex_unlock(&eeh_dev_mutex); + return; + } + + /* No EEH device ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !eeh_dev_passed(edev) || + !edev->pe || !eeh_pe_passed(pe)) { + mutex_unlock(&eeh_dev_mutex); + return; + } + + /* Release device */ + pe = edev->pe; + eeh_dev_set_passed(edev, false); + + /* Release PE */ + eeh_pe_for_each_dev(pe, edev, tmp) { + if (eeh_dev_passed(edev)) { + release_pe = false; + break; + } + } + + if (release_pe) + eeh_pe_set_passed(pe, false); + + mutex_unlock(&eeh_dev_mutex); +} +EXPORT_SYMBOL(eeh_dev_release); + +/** + * eeh_iommu_table_to_pe - Convert IOMMU table to EEH PE + * @tbl: IOMMU table + * + * The routine is called to convert IOMMU table to EEH PE. + */ +struct eeh_pe *eeh_iommu_table_to_pe(struct iommu_table *tbl) +{ + struct pci_dev *pdev = NULL; + struct eeh_dev *edev; + bool found = false; + + /* No IOMMU table ? */ + if (!tbl) + return NULL; + + /* No PCI device ? */ + for_each_pci_dev(pdev) { + if (get_iommu_table_base(&pdev->dev) == tbl) { + found = true; + break; + } + } + if (!found) + return NULL; + + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + return NULL; + + return edev->pe; +} + +/** + * eeh_pe_set_option - Set options for the indicated PE + * @pe: EEH PE + * @option: requested option + * + * The routine is called to enable or disable EEH functionality + * on the indicated PE, to enable IO or DMA for the frozen PE. + */ +int eeh_pe_set_option(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + /* + * EEH functionality could possibly be disabled, just + * return error for the case. And the EEH functinality + * isn't expected to be disabled on one specific PE. + */ + switch (option) { + case EEH_OPT_ENABLE: + if (eeh_enabled()) + break; + ret = -EIO; + break; + case EEH_OPT_DISABLE: + break; + case EEH_OPT_THAW_MMIO: + case EEH_OPT_THAW_DMA: + if (!eeh_ops || !eeh_ops->set_option) { + ret = -ENOENT; + break; + } + + ret = eeh_ops->set_option(pe, option); + break; + default: + pr_debug("%s: Option %d out of range (%d, %d)\n", + __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_set_option); + +/** + * eeh_pe_get_state - Retrieve PE's state + * @pe: EEH PE + * + * Retrieve the PE's state, which includes 3 aspects: enabled + * DMA, enabled IO and asserted reset. + */ +int eeh_pe_get_state(struct eeh_pe *pe) +{ + int result, ret = 0; + + /* Existing PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->get_state) + return -ENOENT; + + result = eeh_ops->get_state(pe, NULL); + if (!(result & EEH_STATE_RESET_ACTIVE) && + (result & EEH_STATE_DMA_ENABLED) && + (result & EEH_STATE_MMIO_ENABLED)) + ret = EEH_PE_STATE_NORMAL; + else if (result & EEH_STATE_RESET_ACTIVE) + ret = EEH_PE_STATE_RESET; + else if (!(result & EEH_STATE_RESET_ACTIVE) && + !(result & EEH_STATE_DMA_ENABLED) && + !(result & EEH_STATE_MMIO_ENABLED)) + ret = EEH_PE_STATE_STOPPED_IO_DMA; + else if (!(result & EEH_STATE_RESET_ACTIVE) && + (result & EEH_STATE_DMA_ENABLED) && + !(result & EEH_STATE_MMIO_ENABLED)) + ret = EEH_PE_STATE_STOPPED_DMA; + else + ret = EEH_PE_STATE_UNAVAIL; + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_get_state); + +/** + * eeh_pe_reset - Issue PE reset according to specified type + * @pe: EEH PE + * @option: reset type + * + * The routine is called to reset the specified PE with the + * indicated type, either fundamental reset or hot reset. + * PE reset is the most important part for error recovery. + */ +int eeh_pe_reset(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) + return -ENOENT; + + switch (option) { + case EEH_RESET_DEACTIVATE: + ret = eeh_ops->reset(pe, option); + if (ret) + break; + + /* + * The PE is still in frozen state and we need to clear + * that. It's good to clear frozen state after deassert + * to avoid messy IO access during reset, which might + * cause recursive frozen PE. + */ + ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO); + if (!ret) + ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + break; + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + ret = eeh_ops->reset(pe, option); + break; + default: + pr_debug("%s: Unsupported option %d\n", + __func__, option); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_reset); + +/** + * eeh_pe_configure - Configure PCI bridges after PE reset + * @pe: EEH PE + * + * The routine is called to restore the PCI config space for + * those PCI devices, especially PCI bridges affected by PE + * reset issued previously. + */ +int eeh_pe_configure(struct eeh_pe *pe) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + /* Restore config space for the affected devices */ + eeh_pe_restore_bars(pe); + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_configure); + static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html