Re: [PATCH v4 09/21] powerpc/powernv: Use PCI slot reset infrastructure

Gavin Shan <gwshan@xxxxxxxxxxxxxxxxxx> · Mon, 11 May 2015 16:45:47 +1000

On Sat, May 09, 2015 at 11:41:05PM +1000, Alexey Kardashevskiy wrote:
>On 05/01/2015 04:02 PM, Gavin Shan wrote:
>>For PowerNV platform, running on top of skiboot, all PE level reset
>>should be routed to firmware if the bridge of the PE primary bus has
>>device-node property "ibm,reset-by-firmware". Otherwise, the kernel
>>has to issue hot reset on PE's primary bus despite the requested reset
>>types, which is the behaviour before the firmware supports PCI slot
>>reset. So the changes don't depend on the PCI slot reset capability
>>exposed from the firmware.
>>
>>Signed-off-by: Gavin Shan <gwshan@xxxxxxxxxxxxxxxxxx>
>>---
>>  arch/powerpc/include/asm/eeh.h               |   1 +
>>  arch/powerpc/include/asm/opal.h              |   4 +-
>>  arch/powerpc/platforms/powernv/eeh-powernv.c | 206 +++++++++++++--------------
>>  3 files changed, 102 insertions(+), 109 deletions(-)
>>
>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>index c5eb86f..2793d24 100644
>>--- a/arch/powerpc/include/asm/eeh.h
>>+++ b/arch/powerpc/include/asm/eeh.h
>>@@ -190,6 +190,7 @@ enum {
>>  #define EEH_RESET_DEACTIVATE	0	/* Deactivate the PE reset	*/
>>  #define EEH_RESET_HOT		1	/* Hot reset			*/
>>  #define EEH_RESET_FUNDAMENTAL	3	/* Fundamental reset		*/
>>+#define EEH_RESET_COMPLETE	4	/* PHB complete reset           */
>>  #define EEH_LOG_TEMP		1	/* EEH temporary error log	*/
>>  #define EEH_LOG_PERM		2	/* EEH permanent error log	*/
>>
>>diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
>>index 042af1a..6d467df 100644
>>--- a/arch/powerpc/include/asm/opal.h
>>+++ b/arch/powerpc/include/asm/opal.h
>>@@ -129,7 +129,7 @@ int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t
>>  int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number,
>>  					uint16_t dma_window_number, uint64_t pci_start_addr,
>>  					uint64_t pci_mem_size);
>>-int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state);
>>+int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state);
>>
>>  int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer,
>>  				   uint64_t diag_buffer_len);
>>@@ -145,7 +145,7 @@ int64_t opal_get_epow_status(__be64 *status);
>>  int64_t opal_set_system_attention_led(uint8_t led_action);
>>  int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
>>  			    __be16 *pci_error_type, __be16 *severity);
>>-int64_t opal_pci_poll(uint64_t phb_id);
>>+int64_t opal_pci_poll(uint64_t id, uint8_t *val);
>>  int64_t opal_return_cpu(void);
>>  int64_t opal_check_token(uint64_t token);
>>  int64_t opal_reinit_cpus(uint64_t flags);
>>diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>index ce738ab..3c01095 100644
>>--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
>>+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>@@ -742,12 +742,12 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
>>  	return ret;
>>  }
>>
>>-static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
>>+static s64 pnv_eeh_poll(uint64_t id)
>>  {
>>  	s64 rc = OPAL_HARDWARE;
>>
>>  	while (1) {
>>-		rc = opal_pci_poll(phb->opal_id);
>>+		rc = opal_pci_poll(id, NULL);
>>  		if (rc <= 0)
>>  			break;
>>
>>@@ -763,84 +763,38 @@ static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
>>  int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
>>  {
>>  	struct pnv_phb *phb = hose->private_data;
>>+	uint8_t scope;
>>  	s64 rc = OPAL_HARDWARE;
>>
>>  	pr_debug("%s: Reset PHB#%x, option=%d\n",
>>  		 __func__, hose->global_number, option);
>>-
>>-	/* Issue PHB complete reset request */
>>-	if (option == EEH_RESET_FUNDAMENTAL ||
>>-	    option == EEH_RESET_HOT)
>>-		rc = opal_pci_reset(phb->opal_id,
>>-				    OPAL_RESET_PHB_COMPLETE,
>>-				    OPAL_ASSERT_RESET);
>>-	else if (option == EEH_RESET_DEACTIVATE)
>>-		rc = opal_pci_reset(phb->opal_id,
>>-				    OPAL_RESET_PHB_COMPLETE,
>>-				    OPAL_DEASSERT_RESET);
>>-	if (rc < 0)
>>-		goto out;
>>-
>>-	/*
>>-	 * Poll state of the PHB until the request is done
>>-	 * successfully. The PHB reset is usually PHB complete
>>-	 * reset followed by hot reset on root bus. So we also
>>-	 * need the PCI bus settlement delay.
>>-	 */
>>-	rc = pnv_eeh_phb_poll(phb);
>>-	if (option == EEH_RESET_DEACTIVATE) {
>>-		if (system_state < SYSTEM_RUNNING)
>>-			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
>>-		else
>>-			msleep(EEH_PE_RST_SETTLE_TIME);
>
>
>These udelay() and msleep() are gone. How come they are not needed anymore?
>Worth commenting in the commit log or remove those in a separate patch.
>
>I just remember you mentioning some missing delays somewhere which caused
>NVIDIA device to issue EEH and I do not want those to disappear :)
>

Yeah, I think you're correct that it's not safe to remove this yet because
the old firmware (without corresponding PCI hotplug changes) doesn't have
the required delays from opal_pci_poll() yet. I'll add this in next revision.

>
>>+	switch (option) {
>>+	case EEH_RESET_HOT:
>>+		scope = OPAL_RESET_PCI_HOT;
>>+		break;
>>+	case EEH_RESET_FUNDAMENTAL:
>>+		scope = OPAL_RESET_PCI_FUNDAMENTAL;
>>+		break;
>>+	case EEH_RESET_COMPLETE:
>>+		scope = OPAL_RESET_PHB_COMPLETE;
>>+		break;
>>+	case EEH_RESET_DEACTIVATE:
>>+		return 0;
>>+	default:
>>+		pr_warn("%s: Unsupported option %d\n",
>>+			__func__, option);
>>+		return -EINVAL;
>>  	}
>>-out:
>>-	if (rc != OPAL_SUCCESS)
>>-		return -EIO;
>>
>>-	return 0;
>>-}
>>-
>>-static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
>>-{
>>-	struct pnv_phb *phb = hose->private_data;
>>-	s64 rc = OPAL_HARDWARE;
>>+	/* Issue reset and poll until it's completed */
>>+	rc = opal_pci_reset(phb->opal_id, scope, OPAL_ASSERT_RESET);
>>+	if (rc > 0)
>>+		rc = pnv_eeh_poll(phb->opal_id);
>>
>>-	pr_debug("%s: Reset PHB#%x, option=%d\n",
>>-		 __func__, hose->global_number, option);
>>-
>>-	/*
>>-	 * During the reset deassert time, we needn't care
>>-	 * the reset scope because the firmware does nothing
>>-	 * for fundamental or hot reset during deassert phase.
>>-	 */
>>-	if (option == EEH_RESET_FUNDAMENTAL)
>>-		rc = opal_pci_reset(phb->opal_id,
>>-				    OPAL_RESET_PCI_FUNDAMENTAL,
>>-				    OPAL_ASSERT_RESET);
>>-	else if (option == EEH_RESET_HOT)
>>-		rc = opal_pci_reset(phb->opal_id,
>>-				    OPAL_RESET_PCI_HOT,
>>-				    OPAL_ASSERT_RESET);
>>-	else if (option == EEH_RESET_DEACTIVATE)
>>-		rc = opal_pci_reset(phb->opal_id,
>>-				    OPAL_RESET_PCI_HOT,
>>-				    OPAL_DEASSERT_RESET);
>>-	if (rc < 0)
>>-		goto out;
>>-
>>-	/* Poll state of the PHB until the request is done */
>>-	rc = pnv_eeh_phb_poll(phb);
>>-	if (option == EEH_RESET_DEACTIVATE)
>>-		msleep(EEH_PE_RST_SETTLE_TIME);
>>-out:
>>-	if (rc != OPAL_SUCCESS)
>>-		return -EIO;
>>-
>>-	return 0;
>>+	return (rc == OPAL_SUCCESS) ? 0 : -EIO;
>>  }
>>
>>-static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
>>+static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
>>  {
>>  	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
>>  	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>@@ -891,14 +845,57 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
>>  	return 0;
>>  }
>>
>>+static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
>>+{
>>+	struct pci_controller *hose;
>>+	struct pnv_phb *phb;
>>+	struct device_node *dn = dev ? pci_device_to_OF_node(dev) : NULL;
>>+	uint64_t id = (0x1ul << 60);
>>+	uint8_t scope;
>>+	s64 rc;
>
>
>int64_t for @rc?
>
>

Yes.

>>+
>>+	/*
>>+	 * If the firmware can't handle it, we will issue hot reset
>>+	 * on the secondary bus despite the requested reset type
>>+	 */
>>+	if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
>>+		return __pnv_eeh_bridge_reset(dev, option);
>>+
>>+	/* The firmware can handle the request */
>>+	switch (option) {
>>+	case EEH_RESET_HOT:
>>+		scope = OPAL_RESET_PCI_HOT;
>>+		break;
>>+	case EEH_RESET_FUNDAMENTAL:
>>+		scope = OPAL_RESET_PCI_FUNDAMENTAL;
>>+		break;
>>+	case EEH_RESET_DEACTIVATE:
>>+		return 0;
>>+	case EEH_RESET_COMPLETE:
>>+	default:
>>+		pr_warn("%s: Unsupported option %d on device %s\n",
>>+			__func__, option, pci_name(dev));
>>+		return -EINVAL;
>>+	}
>
>
>This is the same switch as earlier in this patch (slightly different order).
>Move it and opal_pci_reset() into a helper and call it pnv_opal_pci_reset()?
>
>

It sounds a good idea. I'll do accordingly.

>>+
>>+	hose = pci_bus_to_host(dev->bus);
>>+	phb = hose->private_data;
>
>Previously you would initialize @hose and @phb where you declared those but
>not here. If you did the same thing as before, the patch could have been
>smaller and easier to read.
>

Sure.

>>+	id |= (dev->bus->number << 24) | (dev->devfn << 16) | phb->opal_id;
>>+	rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
>>+	if (rc > 0)
>>+		rc = pnv_eeh_poll(id);
>>+
>>+	return (rc == OPAL_SUCCESS) ? 0 : -EIO;
>>+}
>>+
>>  void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
>>  {
>>  	struct pci_controller *hose;
>>
>>  	if (pci_is_root_bus(dev->bus)) {
>>  		hose = pci_bus_to_host(dev->bus);
>>-		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
>>-		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
>>+		pnv_eeh_phb_reset(hose, EEH_RESET_HOT);
>>+		pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
>>  	} else {
>>  		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
>>  		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
>>@@ -920,8 +917,9 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
>>  static int pnv_eeh_reset(struct eeh_pe *pe, int option)
>>  {
>>  	struct pci_controller *hose = pe->phb;
>>+	struct pnv_phb *phb;
>>  	struct pci_bus *bus;
>>-	int ret;
>>+	s64 rc;
>>
>>  	/*
>>  	 * For PHB reset, we always have complete reset. For those PEs whose
>>@@ -937,43 +935,37 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
>>  	 * reset. The side effect is that EEH core has to clear the frozen
>>  	 * state explicitly after BAR restore.
>>  	 */
>>-	if (pe->type & EEH_PE_PHB) {
>>-		ret = pnv_eeh_phb_reset(hose, option);
>>-	} else {
>>-		struct pnv_phb *phb;
>>-		s64 rc;
>>+	if (pe->type & EEH_PE_PHB)
>
>I would keep "{" in the line above ....
>
>>+		return pnv_eeh_phb_reset(hose, EEH_RESET_COMPLETE);
>
>...put "} else {" here...
>
>and the chunk below would become 1) very small 2) very trivial... And then
>you could make a trivial patch which would do scope removal but without
>functional changes. Or vice versa.
>

I intended to remove nested if(). If you really want me to change the code
according to your comments, I'll do. Otherwise, I prefer to keep it as
of being.

>>
>>-		/*
>>-		 * The frozen PE might be caused by PAPR error injection
>>-		 * registers, which are expected to be cleared after hitting
>>-		 * frozen PE as stated in the hardware spec. Unfortunately,
>>-		 * that's not true on P7IOC. So we have to clear it manually
>>-		 * to avoid recursive EEH errors during recovery.
>>-		 */
>>-		phb = hose->private_data;
>>-		if (phb->model == PNV_PHB_MODEL_P7IOC &&
>>-		    (option == EEH_RESET_HOT ||
>>-		    option == EEH_RESET_FUNDAMENTAL)) {
>>-			rc = opal_pci_reset(phb->opal_id,
>>-					    OPAL_RESET_PHB_ERROR,
>>-					    OPAL_ASSERT_RESET);
>>-			if (rc != OPAL_SUCCESS) {
>>-				pr_warn("%s: Failure %lld clearing "
>>-					"error injection registers\n",
>>-					__func__, rc);
>>-				return -EIO;
>>-			}
>>+	/*
>>+	 * The frozen PE might be caused by PAPR error injection
>>+	 * registers, which are expected to be cleared after hitting
>>+	 * frozen PE as stated in the hardware spec. Unfortunately,
>>+	 * that's not true on P7IOC. So we have to clear it manually
>>+	 * to avoid recursive EEH errors during recovery.
>>+	 */
>>+	phb = hose->private_data;
>>+	if (phb->model == PNV_PHB_MODEL_P7IOC &&
>>+	    (option == EEH_RESET_HOT ||
>>+	    option == EEH_RESET_FUNDAMENTAL)) {
>>+		rc = opal_pci_reset(phb->opal_id,
>>+				    OPAL_RESET_PHB_ERROR,
>>+				    OPAL_ASSERT_RESET);
>>+		if (rc != OPAL_SUCCESS) {
>>+			pr_warn("%s: Failure %lld clearing error "
>>+				"injection registers on PHB#%d\n",
>>+				__func__, rc, hose->global_number);
>>+			return -EIO;
>>  		}
>>-
>>-		bus = eeh_pe_bus_get(pe);
>>-		if (pci_is_root_bus(bus) ||
>>-			pci_is_root_bus(bus->parent))
>>-			ret = pnv_eeh_root_reset(hose, option);
>>-		else
>>-			ret = pnv_eeh_bridge_reset(bus->self, option);
>>  	}
>>
>>-	return ret;
>>+	/* Route the reset request to PHB or upstream bridge */
>>+	bus = eeh_pe_bus_get(pe);
>>+	if (pci_is_root_bus(bus))
>>+		return pnv_eeh_phb_reset(hose, option);
>>+
>>+	return pnv_eeh_bridge_reset(bus->self, option);
>>  }
>>
>>  /**
>>

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html