On Fri, Oct 30, 2015 at 03:11:20PM +1100, Alexey Kardashevskiy wrote: >On 10/26/2015 02:15 PM, Wei Yang wrote: >>PEs for VFs don't have primary bus. So they have to have their own reset >>backend, which is used during EEH recovery. The patch implements the reset >>backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained >>in the PE. >> >>[gwshan: changelog and code refactoring] >>Signed-off-by: Wei Yang <weiyang@xxxxxxxxxxxxxxxxxx> >>Acked-by: Gavin Shan <gwshan@xxxxxxxxxxxxxxxxxx> >>--- >> arch/powerpc/include/asm/eeh.h | 1 + >> arch/powerpc/platforms/powernv/eeh-powernv.c | 134 ++++++++++++++++++++++++++- >> 2 files changed, 134 insertions(+), 1 deletion(-) >> >>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h >>index ec21f8f..331c856 100644 >>--- a/arch/powerpc/include/asm/eeh.h >>+++ b/arch/powerpc/include/asm/eeh.h >>@@ -136,6 +136,7 @@ struct eeh_dev { >> int pcix_cap; /* Saved PCIx capability */ >> int pcie_cap; /* Saved PCIe capability */ >> int aer_cap; /* Saved AER capability */ >>+ int af_cap; /* Saved AF capability */ >> struct eeh_pe *pe; /* Associated PE */ >> struct list_head list; /* Form link list in the PE */ >> struct pci_controller *phb; /* Associated PHB */ >>diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c >>index cfd55dd..017cd72 100644 >>--- a/arch/powerpc/platforms/powernv/eeh-powernv.c >>+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c >>@@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) >> edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); >> edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); >> edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); >>+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); >> if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { >> edev->mode |= EEH_DEV_BRIDGE; >> if (edev->pcie_cap) { >>@@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) >> return 0; >> } >> >>+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, >>+ u16 mask, bool af_flr_rst) >>+{ >>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>+ int status, i; >>+ >>+ /* Wait for Transaction Pending bit to be cleared */ >>+ for (i = 0; i < 4; i++) { >>+ eeh_ops->read_config(pdn, pos, 2, &status); > > >gcc should have complained on using uninitialized @status here. > I remove the obj file and re-compile the file, not the warning. And took a look at other places where read_config() is called. The laster parameter is not initialized before called. You see the error during build? > >>+ if (!(status & mask)) >>+ return; >>+ >>+ msleep((1 << i) * 100); >>+ } >>+ >>+ pr_warn("%s: Pending transaction while issuing %s FLR to " >>+ "%04x:%02x:%02x.%01x\n", > >Do not wrap user-visible strings. > Will change this. > >>+ __func__, af_flr_rst ? "AF" : "", >>+ edev->phb->global_number, pdn->busno, >>+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); >>+} >>+ >>+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) >>+{ >>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>+ u32 reg; >>+ >>+ if (!edev->pcie_cap) >>+ return -ENOTTY; > > >Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that >far? WARN_ON_ONCE() may be? > So you suggest to add a WARN_ON_ONCE() in this condition, right? > >>+ >>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); > > >... and here about uninitialized @reg. > > >>+ if (!(reg & PCI_EXP_DEVCAP_FLR)) >>+ return -ENOTTY; >>+ >>+ switch (option) { >>+ case EEH_RESET_HOT: >>+ case EEH_RESET_FUNDAMENTAL: >>+ pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA, >>+ PCI_EXP_DEVSTA_TRPND, false); >>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, ®); >>+ reg |= PCI_EXP_DEVCTL_BCR_FLR; >>+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, reg); >>+ msleep(EEH_PE_RST_HOLD_TIME); >>+ break; >>+ case EEH_RESET_DEACTIVATE: >>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, ®); >>+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR; >>+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, reg); >>+ msleep(EEH_PE_RST_SETTLE_TIME); >>+ break; >>+ } >>+ >>+ return 0; >>+} >>+ >>+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) >>+{ >>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>+ u32 cap; >>+ >>+ if (!edev->af_cap) >>+ return -ENOTTY; >>+ >>+ eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); > > >... and here about @cap. > >>+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) >>+ return -ENOTTY; >>+ >>+ switch (option) { >>+ case EEH_RESET_HOT: >>+ case EEH_RESET_FUNDAMENTAL: >>+ /* >>+ * Wait for Transaction Pending bit to clear. A word-aligned >>+ * test is used, so we use the conrol offset rather than status >>+ * and shift the test bit to match. > > >Why word-aligned (not byte or double word)? > I copied this words from pci_af_flr(). Actually, I don't tried to understand this reason. >>+ */ >>+ pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL, >>+ PCI_AF_STATUS_TP << 8, true); >>+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, >>+ 1, PCI_AF_CTRL_FLR); >>+ msleep(EEH_PE_RST_HOLD_TIME); >>+ break; >>+ case EEH_RESET_DEACTIVATE: >>+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); >>+ msleep(EEH_PE_RST_SETTLE_TIME); > > >btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined >as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason >this is actually doubled so there is another reset somewhere). > I don't know the reason for this value. This code keeps aligned with other reset functions, like pnv_eeh_bridge_reset(). >Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for >such a huge timeout? > > >>+ break; >>+ } >>+ >>+ return 0; >>+} >>+ >>+static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option) >>+{ >>+ int ret; >>+ >>+ ret = pnv_eeh_do_flr(pdn, option); >>+ if (ret != -ENOTTY) >>+ return ret; >>+ >>+ return pnv_eeh_do_af_flr(pdn, option); >>+} >>+ >>+static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) >>+{ >>+ struct eeh_dev *edev, *tmp; >>+ struct pci_dn *pdn; >>+ int ret; >>+ >>+ eeh_pe_for_each_dev(pe, edev, tmp) { >>+ pdn = eeh_dev_to_pdn(edev); >>+ ret = pnv_eeh_reset_vf(pdn, option); >>+ if (ret) >>+ return ret; >>+ } >>+ >>+ return 0; >>+} >>+ >> void pnv_pci_reset_secondary_bus(struct pci_dev *dev) >> { >> struct pci_controller *hose; >>@@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) >> } >> >> bus = eeh_pe_bus_get(pe); >>- if (pci_is_root_bus(bus) || >>+ if (pe->type & EEH_PE_VF) >>+ ret = pnv_eeh_vf_pe_reset(pe, option); >>+ else if (pci_is_root_bus(bus) || >> pci_is_root_bus(bus->parent)) >> ret = pnv_eeh_root_reset(hose, option); >> else >>@@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) >> if (!edev || !edev->pe) >> return false; >> >>+ /* >>+ * We will issue FLR or AF FLR to all VFs, which are contained >>+ * in VF PE. It relies on the EEH PCI config accessors. So we >>+ * can't block them during the window. >>+ */ >>+ if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET)) > > >Extra braces around edev->physfn. > Will remove it. > > >>+ return false; >>+ >> if (edev->pe->state & EEH_PE_CFG_BLOCKED) >> return true; >> >> > > >-- >Alexey -- Richard Yang Help you, Help me -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html