On Fri, 2009-08-21 at 12:48 +0900, Hidetoshi Seto wrote: > ERR_{,UN}CORRECTABLE_ERROR_MASK are set of error bits which linux know, > set of PCI_ERR_COR_* and PCI_ERR_UNC_* defined in linux/pci_regs.h. > This masks make aerdrv not to report errors of unknown bit, while aerdrv > have ability to report such undefined errors as "Unknown Error Bit %2d". > > OTOH aerdrv_errprint does not have any check of setting in mask register. > So it could report masked wrong error by finding bit in status without > knowing that the bit is masked in the mask register. > > This patch changes aerdrv to use mask state in mask register properly > instead of defined/hardcoded ERR_{,UN}CORRECTABLE_ERROR_MASK. > This change prevents aerdrv from reporting masked error, and also enable > reporting unknown errors. > > Signed-off-by: Hidetoshi Seto <seto.hidetoshi@xxxxxxxxxxxxxx> > --- > drivers/pci/pcie/aer/aerdrv.h | 4 +--- > drivers/pci/pcie/aer/aerdrv_core.c | 28 ++++++++++++---------------- > drivers/pci/pcie/aer/aerdrv_errprint.c | 6 ++++-- > 3 files changed, 17 insertions(+), 21 deletions(-) > > diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h > index 820ea73..0db530d 100644 > --- a/drivers/pci/pcie/aer/aerdrv.h > +++ b/drivers/pci/pcie/aer/aerdrv.h > @@ -47,9 +47,6 @@ > #define AER_TLP_HEADER_VALID_FLAG 0x00000001 > #define AER_MULTI_ERROR_VALID_FLAG 0x00000002 > > -#define ERR_CORRECTABLE_ERROR_MASK 0x000031c1 > -#define ERR_UNCORRECTABLE_ERROR_MASK 0x001ff010 > - > struct header_log_regs { > unsigned int dw0; > unsigned int dw1; > @@ -65,6 +62,7 @@ struct aer_err_info { > int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ > int flags; > unsigned int status; /* COR/UNCOR Error Status */ > + unsigned int mask; /* COR/UNCOR Error Mask */ > struct header_log_regs tlp; /* TLP Header */ > }; > > diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c > index 4d67db8..38b3933 100644 > --- a/drivers/pci/pcie/aer/aerdrv_core.c > +++ b/drivers/pci/pcie/aer/aerdrv_core.c > @@ -236,24 +236,16 @@ static int find_device_iter(struct pci_dev *dev, void *data) > status = 0; > mask = 0; > if (e_info->severity == AER_CORRECTABLE) { > - pci_read_config_dword(dev, > - pos + PCI_ERR_COR_STATUS, > - &status); > - pci_read_config_dword(dev, > - pos + PCI_ERR_COR_MASK, > - &mask); > - if (status & ERR_CORRECTABLE_ERROR_MASK & ~mask) { > + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); > + pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); > + if (status & ~mask) { > add_error_device(e_info, dev); > goto added; > } > } else { > - pci_read_config_dword(dev, > - pos + PCI_ERR_UNCOR_STATUS, > - &status); > - pci_read_config_dword(dev, > - pos + PCI_ERR_UNCOR_MASK, > - &mask); > - if (status & ERR_UNCORRECTABLE_ERROR_MASK & ~mask) { > + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); > + if (status & ~mask) { > add_error_device(e_info, dev); > goto added; > } > @@ -720,7 +712,9 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) > if (info->severity == AER_CORRECTABLE) { > pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, > &info->status); > - if (!(info->status & ERR_CORRECTABLE_ERROR_MASK)) > + pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, > + &info->mask); > + if (!(info->status & ~info->mask)) > return AER_UNSUCCESS; > } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || > info->severity == AER_NONFATAL) { > @@ -728,7 +722,9 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) > /* Link is still healthy for IO reads */ > pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, > &info->status); > - if (!(info->status & ERR_UNCORRECTABLE_ERROR_MASK)) > + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, > + &info->mask); > + if (!(info->status & ~info->mask)) > return AER_UNSUCCESS; > > if (info->status & AER_LOG_TLP_MASKS) { > diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c > index 5fd1178..919c2c0 100644 > --- a/drivers/pci/pcie/aer/aerdrv_errprint.c > +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c > @@ -154,11 +154,13 @@ static char *aer_agent_string[] = { > > static void aer_print_error_source(struct aer_err_info *info) > { > - int i; > + int i, status; > char *errmsg = NULL; > > + status = (info->status & ~info->mask); > + > for (i = 0; i < 32; i++) { > - if (!(info->status & (1 << i))) > + if (!(status & (1 << i))) > continue; > > if (info->severity == AER_CORRECTABLE) Reviewed-by: Andrew Patterson <andrew.patterson@xxxxxx> -- Andrew Patterson Hewlett-Packard -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html