Re: [PATCH V4: 1/3] pci: Provide Multiple Error Received and no error source id support on AER

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2009-06-12 at 11:08 +0800, Zhang, Yanmin wrote:
> Anyone could help me test it on powerpc? Or at least a compilation
> to see there is any compiling error/warning. I have no powerpc machine.
> Thanks.
> 
> Changelog V4:
> 	Port the patches to Jesses' linux-next tree (mostly based on
> 	2.6.30).
> 
> Changelog V3:
>         1) Probe devices under the root port when the bus id of
>         the source id is equal to 0; V2 does so when device id is
>         equal to 0;
>         2) Add more comments on critical path of finding devices.
> 
> Changelog V2:
>         Version 2 adds nosourceid, a boot parameter. When
>         aerdriver.nosourceid=y, aerdriver doesn't use the source
>         id saved by root port. Instead, it searches the device
>         tree under the root port to find the reporter. So if hardware
>         has errata and root port saves a bad source id, aerdriver
>         still could find the reporter.
>         There are 2 scenarios under which aerdriver searches the
>         device tree under root port:
>         1) nosourceid=n and error source id is equal to 0;
>         2) nosourceid=y.
> 
> Based on PCI Express AER specs, a root port might receive multiple
> TLP errors while it could only save a correctable error source id
> and an uncorrectable error source id at the same time. In addition,
> some root port hardware might be unable to provide a correct source
> id, i.e., the source id, or the bus id part of the source id provided
> by root port might be equal to 0.
> 
> The patchset implements the support in kernel by searching the device
> tree under the root port.
> 
> Patch 1 changes parameter cb of function pci_walk_bus to return a value.
> When cb return non-zero, pci_walk_bus stops more searching on the
> device tree.
> 
> Signed-off-by: Zhang Yanmin <yanmin.zhang@xxxxxxxxxxxxxxx>
> 

This one looks fine to me. 

Reviewed-by: Andrew Patterson <andrew.patterson@xxxxxx>

> ---
> 
> diff -Nraup linux-2.6_next/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6_next_pciwalk/arch/powerpc/platforms/pseries/eeh_driver.c
> --- linux-2.6_next/arch/powerpc/platforms/pseries/eeh_driver.c	2009-06-12 05:25:14.000000000 +0800
> +++ linux-2.6_next_pciwalk/arch/powerpc/platforms/pseries/eeh_driver.c	2009-06-12 05:28:08.000000000 +0800
> @@ -122,7 +122,7 @@ static void eeh_enable_irq(struct pci_de
>   * passed back in "userdata".
>   */
>  
> -static void eeh_report_error(struct pci_dev *dev, void *userdata)
> +static int eeh_report_error(struct pci_dev *dev, void *userdata)
>  {
>  	enum pci_ers_result rc, *res = userdata;
>  	struct pci_driver *driver = dev->driver;
> @@ -130,19 +130,21 @@ static void eeh_report_error(struct pci_
>  	dev->error_state = pci_channel_io_frozen;
>  
>  	if (!driver)
> -		return;
> +		return 0;
>  
>  	eeh_disable_irq(dev);
>  
>  	if (!driver->err_handler ||
>  	    !driver->err_handler->error_detected)
> -		return;
> +		return 0;
>  
>  	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
>  
>  	/* A driver that needs a reset trumps all others */
>  	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
> +
> +	return 0;
>  }
>  
>  /**
> @@ -153,7 +155,7 @@ static void eeh_report_error(struct pci_
>   * Cumulative response passed back in "userdata".
>   */
>  
> -static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
> +static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
>  {
>  	enum pci_ers_result rc, *res = userdata;
>  	struct pci_driver *driver = dev->driver;
> @@ -161,26 +163,28 @@ static void eeh_report_mmio_enabled(stru
>  	if (!driver ||
>  	    !driver->err_handler ||
>  	    !driver->err_handler->mmio_enabled)
> -		return;
> +		return 0;
>  
>  	rc = driver->err_handler->mmio_enabled (dev);
>  
>  	/* A driver that needs a reset trumps all others */
>  	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
> +
> +	return 0;
>  }
>  
>  /**
>   * eeh_report_reset - tell device that slot has been reset
>   */
>  
> -static void eeh_report_reset(struct pci_dev *dev, void *userdata)
> +static int eeh_report_reset(struct pci_dev *dev, void *userdata)
>  {
>  	enum pci_ers_result rc, *res = userdata;
>  	struct pci_driver *driver = dev->driver;
>  
>  	if (!driver)
> -		return;
> +		return 0;
>  
>  	dev->error_state = pci_channel_io_normal;
>  
> @@ -188,35 +192,39 @@ static void eeh_report_reset(struct pci_
>  
>  	if (!driver->err_handler ||
>  	    !driver->err_handler->slot_reset)
> -		return;
> +		return 0;
>  
>  	rc = driver->err_handler->slot_reset(dev);
>  	if ((*res == PCI_ERS_RESULT_NONE) ||
>  	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
>  	if (*res == PCI_ERS_RESULT_DISCONNECT &&
>  	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
> +
> +	return 0;
>  }
>  
>  /**
>   * eeh_report_resume - tell device to resume normal operations
>   */
>  
> -static void eeh_report_resume(struct pci_dev *dev, void *userdata)
> +static int eeh_report_resume(struct pci_dev *dev, void *userdata)
>  {
>  	struct pci_driver *driver = dev->driver;
>  
>  	dev->error_state = pci_channel_io_normal;
>  
>  	if (!driver)
> -		return;
> +		return 0;
>  
>  	eeh_enable_irq(dev);
>  
>  	if (!driver->err_handler ||
>  	    !driver->err_handler->resume)
> -		return;
> +		return 0;
>  
>  	driver->err_handler->resume(dev);
> +
> +	return 0;
>  }
>  
>  /**
> @@ -226,22 +234,24 @@ static void eeh_report_resume(struct pci
>   * dead, and that no further recovery attempts will be made on it.
>   */
>  
> -static void eeh_report_failure(struct pci_dev *dev, void *userdata)
> +static int eeh_report_failure(struct pci_dev *dev, void *userdata)
>  {
>  	struct pci_driver *driver = dev->driver;
>  
>  	dev->error_state = pci_channel_io_perm_failure;
>  
>  	if (!driver)
> -		return;
> +		return 0;
>  
>  	eeh_disable_irq(dev);
>  
>  	if (!driver->err_handler ||
>  	    !driver->err_handler->error_detected)
> -		return;
> +		return 0;
>  
>  	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
> +
> +	return 0;
>  }
>  
>  /* ------------------------------------------------------- */
> diff -Nraup linux-2.6_next/drivers/pci/bus.c linux-2.6_next_pciwalk/drivers/pci/bus.c
> --- linux-2.6_next/drivers/pci/bus.c	2009-06-12 05:25:43.000000000 +0800
> +++ linux-2.6_next_pciwalk/drivers/pci/bus.c	2009-06-12 05:28:08.000000000 +0800
> @@ -206,13 +206,18 @@ void pci_enable_bridges(struct pci_bus *
>   *  Walk the given bus, including any bridged devices
>   *  on buses under this bus.  Call the provided callback
>   *  on each device found.
> + *
> + *  We check the return of @cb each time. If it returns anything
> + *  other than 0, we break out.
> + *
>   */
> -void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
> +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
>  		  void *userdata)
>  {
>  	struct pci_dev *dev;
>  	struct pci_bus *bus;
>  	struct list_head *next;
> +	int retval;
>  
>  	bus = top;
>  	down_read(&pci_bus_sem);
> @@ -236,8 +241,10 @@ void pci_walk_bus(struct pci_bus *top, v
>  
>  		/* Run device routines with the device locked */
>  		down(&dev->dev.sem);
> -		cb(dev, userdata);
> +		retval = cb(dev, userdata);
>  		up(&dev->dev.sem);
> +		if (retval)
> +			break;
>  	}
>  	up_read(&pci_bus_sem);
>  }
> diff -Nraup linux-2.6_next/drivers/pci/pcie/aer/aerdrv_core.c linux-2.6_next_pciwalk/drivers/pci/pcie/aer/aerdrv_core.c
> --- linux-2.6_next/drivers/pci/pcie/aer/aerdrv_core.c	2009-06-12 05:25:43.000000000 +0800
> +++ linux-2.6_next_pciwalk/drivers/pci/pcie/aer/aerdrv_core.c	2009-06-12 05:31:53.000000000 +0800
> @@ -109,7 +109,7 @@ int pci_cleanup_aer_correct_error_status
>  #endif  /*  0  */
>  
> 
> -static void set_device_error_reporting(struct pci_dev *dev, void *data)
> +static int set_device_error_reporting(struct pci_dev *dev, void *data)
>  {
>  	bool enable = *((bool *)data);
>  
> @@ -124,6 +124,8 @@ static void set_device_error_reporting(s
>  
>  	if (enable)
>  		pcie_set_ecrc_checking(dev);
> +
> +	return 0;
>  }
>  
>  /**
> @@ -207,7 +209,7 @@ static struct device* find_source_device
>  	return NULL;
>  }
>  
> -static void report_error_detected(struct pci_dev *dev, void *data)
> +static int report_error_detected(struct pci_dev *dev, void *data)
>  {
>  	pci_ers_result_t vote;
>  	struct pci_error_handlers *err_handler;
> @@ -232,16 +234,16 @@ static void report_error_detected(struct
>  				   dev->driver ?
>  				   "no AER-aware driver" : "no driver");
>  		}
> -		return;
> +		return 0;
>  	}
>  
>  	err_handler = dev->driver->err_handler;
>  	vote = err_handler->error_detected(dev, result_data->state);
>  	result_data->result = merge_result(result_data->result, vote);
> -	return;
> +	return 0;
>  }
>  
> -static void report_mmio_enabled(struct pci_dev *dev, void *data)
> +static int report_mmio_enabled(struct pci_dev *dev, void *data)
>  {
>  	pci_ers_result_t vote;
>  	struct pci_error_handlers *err_handler;
> @@ -251,15 +253,15 @@ static void report_mmio_enabled(struct p
>  	if (!dev->driver ||
>  		!dev->driver->err_handler ||
>  		!dev->driver->err_handler->mmio_enabled)
> -		return;
> +		return 0;
>  
>  	err_handler = dev->driver->err_handler;
>  	vote = err_handler->mmio_enabled(dev);
>  	result_data->result = merge_result(result_data->result, vote);
> -	return;
> +	return 0;
>  }
>  
> -static void report_slot_reset(struct pci_dev *dev, void *data)
> +static int report_slot_reset(struct pci_dev *dev, void *data)
>  {
>  	pci_ers_result_t vote;
>  	struct pci_error_handlers *err_handler;
> @@ -269,15 +271,15 @@ static void report_slot_reset(struct pci
>  	if (!dev->driver ||
>  		!dev->driver->err_handler ||
>  		!dev->driver->err_handler->slot_reset)
> -		return;
> +		return 0;
>  
>  	err_handler = dev->driver->err_handler;
>  	vote = err_handler->slot_reset(dev);
>  	result_data->result = merge_result(result_data->result, vote);
> -	return;
> +	return 0;
>  }
>  
> -static void report_resume(struct pci_dev *dev, void *data)
> +static int report_resume(struct pci_dev *dev, void *data)
>  {
>  	struct pci_error_handlers *err_handler;
>  
> @@ -286,11 +288,11 @@ static void report_resume(struct pci_dev
>  	if (!dev->driver ||
>  		!dev->driver->err_handler ||
>  		!dev->driver->err_handler->resume)
> -		return;
> +		return 0;
>  
>  	err_handler = dev->driver->err_handler;
>  	err_handler->resume(dev);
> -	return;
> +	return 0;
>  }
>  
>  /**
> @@ -307,7 +309,7 @@ static void report_resume(struct pci_dev
>  static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
>  	enum pci_channel_state state,
>  	char *error_mesg,
> -	void (*cb)(struct pci_dev *, void *))
> +	int (*cb)(struct pci_dev *, void *))
>  {
>  	struct aer_broadcast_data result_data;
>  
> diff -Nraup linux-2.6_next/include/linux/pci.h linux-2.6_next_pciwalk/include/linux/pci.h
> --- linux-2.6_next/include/linux/pci.h	2009-06-12 05:24:32.000000000 +0800
> +++ linux-2.6_next_pciwalk/include/linux/pci.h	2009-06-12 05:28:08.000000000 +0800
> @@ -789,7 +789,7 @@ const struct pci_device_id *pci_match_id
>  int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
>  		    int pass);
>  
> -void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
> +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
>  		  void *userdata);
>  int pci_cfg_space_size_ext(struct pci_dev *dev);
>  int pci_cfg_space_size(struct pci_dev *dev);
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
-- 
Andrew Patterson
Hewlett-Packard

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux