On Fri, May 15, 2015 at 01:46:24PM +0800, Wei Yang wrote: >Compared with Bus PE, VF PE just has one single pci function. This >introduces the difference of error handling on a VF PE. > >For example in the hotplug case, EEH needs to remove and re-create the VF >properly. In the case when PF's error_detected() disable SRIOV, this patch >introduces a flag to mark the eeh_dev of a VF to avoid the slot_reset() and >resume(). Since the FW is not ware of the VF, this patch handles the VF >restore/reset in kernel directly. > >This patch is to handle the VF PE properly in these cases. > >Signed-off-by: Wei Yang <weiyang@xxxxxxxxxxxxxxxxxx> Acked-by: Gavin Shan <gwshan@xxxxxxxxxxxxxxxxxx> With following things fixed: >--- > arch/powerpc/include/asm/eeh.h | 1 + > arch/powerpc/kernel/eeh.c | 1 + > arch/powerpc/kernel/eeh_driver.c | 103 ++++++++++++++++++++++++++++++-------- > arch/powerpc/kernel/eeh_pe.c | 3 +- > 4 files changed, 85 insertions(+), 23 deletions(-) > >diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h >index 3d64cf3..d24382c 100644 >--- a/arch/powerpc/include/asm/eeh.h >+++ b/arch/powerpc/include/asm/eeh.h >@@ -140,6 +140,7 @@ struct eeh_dev { > struct pci_controller *phb; /* Associated PHB */ > struct pci_dn *pdn; /* Associated PCI device node */ > struct pci_dev *pdev; /* Associated PCI device */ >+ int in_error; /* Error flag for eeh_dev */ > struct pci_dev *physfn; /* Associated PF PORT */ > struct pci_bus *bus; /* PCI bus for partial hotplug */ > }; >diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c >index 221e280..077c3d1 100644 >--- a/arch/powerpc/kernel/eeh.c >+++ b/arch/powerpc/kernel/eeh.c >@@ -1226,6 +1226,7 @@ void eeh_remove_device(struct pci_dev *dev) > * from the parent PE during the BAR resotre. > */ > edev->pdev = NULL; >+ edev->in_error = 0; Could you please put detailed comments aboug the the usage of "in_error" here? I may look into it later to remove it. For now, you don't need to do that since we're almost run out of time. > dev->dev.archdata.edev = NULL; > if (!(edev->pe->state & EEH_PE_KEEP)) > eeh_rmv_from_parent_pe(edev); >diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c >index 89eb4bc..292089e 100644 >--- a/arch/powerpc/kernel/eeh_driver.c >+++ b/arch/powerpc/kernel/eeh_driver.c >@@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata) > if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; > if (*res == PCI_ERS_RESULT_NONE) *res = rc; > >+ edev->in_error = 1; > eeh_pcid_put(dev); > return NULL; > } >@@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata) > > if (!driver->err_handler || > !driver->err_handler->slot_reset || >- (edev->mode & EEH_DEV_NO_HANDLER)) { >+ (edev->mode & EEH_DEV_NO_HANDLER) || >+ (!edev->in_error)) { > eeh_pcid_put(dev); > return NULL; > } >@@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata) > > if (!driver->err_handler || > !driver->err_handler->resume || >- (edev->mode & EEH_DEV_NO_HANDLER)) { >+ (edev->mode & EEH_DEV_NO_HANDLER) || >+ (!edev->in_error)) { > edev->mode &= ~EEH_DEV_NO_HANDLER; >- eeh_pcid_put(dev); >- return NULL; >+ goto out; > } > > driver->err_handler->resume(dev); > >+out: >+ edev->in_error = 0; > eeh_pcid_put(dev); > return NULL; > } >@@ -386,12 +390,40 @@ static void *eeh_report_failure(void *data, void *userdata) > return NULL; > } > >+#ifdef CONFIG_PCI_IOV >+static void *eeh_add_virt_device(void *data, void *userdata) >+{ >+ struct pci_driver *driver; >+ struct eeh_dev *edev = (struct eeh_dev *)data; >+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev); >+ struct pci_dn *pdn = eeh_dev_to_pdn(edev); >+ >+ if (!(edev->physfn)) { >+ pr_warn("%s: EEH dev %04x:%02x:%02x:%01x not for VF\n", >+ __func__, edev->phb->global_number, pdn->busno, >+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); %04x:%02x:%02x.%01x The connection symbol between device/function number is ".", not ":". >+ return NULL; >+ } >+ >+ driver = eeh_pcid_get(dev); >+ if (driver) { >+ eeh_pcid_put(dev); >+ if (driver->err_handler) >+ return NULL; >+ } >+ >+ pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0); >+ return NULL; >+} >+#endif /* CONFIG_PCI_IOV */ >+ > static void *eeh_rmv_device(void *data, void *userdata) > { > struct pci_driver *driver; > struct eeh_dev *edev = (struct eeh_dev *)data; > struct pci_dev *dev = eeh_dev_to_pci_dev(edev); > int *removed = (int *)userdata; >+ struct pci_dn *pdn = eeh_dev_to_pdn(edev); > > /* > * Actually, we should remove the PCI bridges as well. >@@ -416,7 +448,7 @@ static void *eeh_rmv_device(void *data, void *userdata) > driver = eeh_pcid_get(dev); > if (driver) { > eeh_pcid_put(dev); >- if (driver->err_handler) >+ if (removed && driver->err_handler) > return NULL; > } > >@@ -425,11 +457,18 @@ static void *eeh_rmv_device(void *data, void *userdata) > pci_name(dev)); > edev->bus = dev->bus; > edev->mode |= EEH_DEV_DISCONNECTED; >- (*removed)++; >- >- pci_lock_rescan_remove(); >- pci_stop_and_remove_bus_device(dev); >- pci_unlock_rescan_remove(); >+ if (removed) >+ (*removed)++; >+ >+ if (edev->physfn) { >+ pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0); >+ edev->pdev = NULL; >+ pdn->pe_number = IODA_INVALID_PE; >+ } else { >+ pci_lock_rescan_remove(); >+ pci_stop_and_remove_bus_device(dev); >+ pci_unlock_rescan_remove(); >+ } > > return NULL; > } >@@ -548,6 +587,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); > struct timeval tstamp; > int cnt, rc, removed = 0; >+ struct eeh_dev *edev; > > /* pcibios will clear the counter; save the value */ > cnt = pe->freeze_count; >@@ -561,12 +601,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > */ > eeh_pe_state_mark(pe, EEH_PE_KEEP); > if (bus) { >- pci_lock_rescan_remove(); >- pcibios_remove_pci_devices(bus); >- pci_unlock_rescan_remove(); >- } else if (frozen_bus) { >+ if (pe->type & EEH_PE_VF) >+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); >+ else { >+ pci_lock_rescan_remove(); >+ pcibios_remove_pci_devices(bus); >+ pci_unlock_rescan_remove(); >+ } >+ } else if (frozen_bus) > eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); >- } > > /* > * Reset the pci controller. (Asserts RST#; resets config space). >@@ -607,14 +650,26 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) > * PE. We should disconnect it so the binding can be > * rebuilt when adding PCI devices. > */ >+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); > eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); >- pcibios_add_pci_devices(bus); >+#ifdef CONFIG_PCI_IOV >+ if (pe->type & EEH_PE_VF) >+ eeh_add_virt_device(edev, NULL); >+ else >+#endif >+ pcibios_add_pci_devices(bus); > } else if (frozen_bus && removed) { > pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); > ssleep(5); > >+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); > eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); >- pcibios_add_pci_devices(frozen_bus); >+#ifdef CONFIG_PCI_IOV >+ if (pe->type & EEH_PE_VF) >+ eeh_add_virt_device(edev, NULL); >+ else >+#endif >+ pcibios_add_pci_devices(frozen_bus); > } > eeh_pe_state_clear(pe, EEH_PE_KEEP); > >@@ -792,11 +847,15 @@ perm_error: > * the their PCI config any more. > */ > if (frozen_bus) { >- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); >- >- pci_lock_rescan_remove(); >- pcibios_remove_pci_devices(frozen_bus); >- pci_unlock_rescan_remove(); >+ if (pe->type & EEH_PE_VF) { >+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); >+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); >+ } else { >+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); >+ pci_lock_rescan_remove(); >+ pcibios_remove_pci_devices(frozen_bus); >+ pci_unlock_rescan_remove(); >+ } > } > } > >diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c >index 260a701..5cde950 100644 >--- a/arch/powerpc/kernel/eeh_pe.c >+++ b/arch/powerpc/kernel/eeh_pe.c >@@ -914,7 +914,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) > if (pe->type & EEH_PE_PHB) { > bus = pe->phb->bus; > } else if (pe->type & EEH_PE_BUS || >- pe->type & EEH_PE_DEVICE) { >+ pe->type & EEH_PE_DEVICE || >+ pe->type & EEH_PE_VF) { > if (pe->bus) { > bus = pe->bus; > goto out; >-- >1.7.9.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html