Re: [PATCH] edac: Handle EDAC ECC errors for Family 16h

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 15, 2013 at 9:17 AM, Aravind Gopalakrishnan
<Aravind.Gopalakrishnan@xxxxxxx> wrote:
> Add code to handle ECC decoding for fam16h. Support exists for
> previous families already, so code has been reused werever applicable
> and some code has been added to handle fam16h specific operations.
>
> The patch was tested on Fam16h with ECC turned on
> using the mce_amd_inj facility and works fine.
>
> Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@xxxxxxx>
> ---
>  arch/x86/kernel/amd_nb.c  |    4 +-
>  drivers/edac/amd64_edac.c |   96 +++++++++++++++++++++++++++++++++++++++++++--
>  drivers/edac/amd64_edac.h |   12 +++++-
>  include/linux/pci_ids.h   |    2 +
>  4 files changed, 109 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
> index 4c39baa..9df1034 100644
> --- a/arch/x86/kernel/amd_nb.c
> +++ b/arch/x86/kernel/amd_nb.c
> @@ -16,12 +16,14 @@ const struct pci_device_id amd_nb_misc_ids[] = {
>         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
>         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
>         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
> +       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
>         {}
>  };
>  EXPORT_SYMBOL(amd_nb_misc_ids);
>
>  static struct pci_device_id amd_nb_link_ids[] = {
>         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
> +       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
>         {}
>  };
>
> @@ -79,7 +81,7 @@ int amd_cache_northbridges(void)
>
>         /* some CPU families (e.g. family 0x11) do not support GART */
>         if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
> -           boot_cpu_data.x86 == 0x15)
> +           boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
>                 amd_northbridges.flags |= AMD_NB_GART;
>
>         /*
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 9a8bebc..f43b608 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -98,6 +98,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
>   *
>   * F15h: we select which DCT we access using F1x10C[DctCfgSel]
>   *
> + * F16h has only 1 DCT
>   */
>  static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
>                                const char *func)
> @@ -133,6 +134,15 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
>         return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
>  }
>
> +static int f16_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
> +                               const char *func)
> +{
> +       if (addr >= 0x100)
> +               return -EINVAL;
> +
> +       return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
> +}
> +
>  /*
>   * Memory scrubber control interface. For K8, memory scrubbing is handled by
>   * hardware and can involve L2 cache, dcache as well as the main memory. With
> @@ -320,13 +330,48 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
>         u64 csbase, csmask, base_bits, mask_bits;
>         u8 addr_shift;
>
> +       /* variables specific to F16h */
> +       u64 base_bits_f16_low, base_bits_f16_high;
> +       u64 mask_bits_f16_low, mask_bits_f16_high;
> +       u8 addr_shift_f16_low, addr_shift_f16_high;
> +
>         if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
>                 csbase          = pvt->csels[dct].csbases[csrow];
>                 csmask          = pvt->csels[dct].csmasks[csrow];
>                 base_bits       = GENMASK(21, 31) | GENMASK(9, 15);
>                 mask_bits       = GENMASK(21, 29) | GENMASK(9, 15);
>                 addr_shift      = 4;
> -       } else {
> +       }
> +
> +       /*
> +       * there are two addr_shift values for F16h.
> +       * addr_shift of 8 for high bits and addr_shift of 6
> +       * for low bits. So handle it differently.
> +       */
> +       else if (boot_cpu_data.x86 == 0x16) {
> +               csbase          = pvt->csels[dct].csbases[csrow];
> +               csmask          = pvt->csels[dct].csmasks[csrow >> 1];
> +               base_bits_f16_low = mask_bits_f16_low = GENMASK(5 , 15);
> +               base_bits_f16_high = mask_bits_f16_high = GENMASK(19 , 30);
> +               addr_shift_f16_low = 6;
> +               addr_shift_f16_high = 8;
> +               *base = (((csbase & base_bits_f16_high)
> +                               << addr_shift_f16_high) |
> +                       ((csbase & base_bits_f16_low)
> +                               << addr_shift_f16_low));
> +               *mask = ~0ULL;
> +               *mask &= ~((mask_bits_f16_high
> +                               << addr_shift_f16_high) |
> +                               (mask_bits_f16_low
> +                               << addr_shift_f16_low));
> +               *mask |= (((csmask & mask_bits_f16_high)
> +                               << addr_shift_f16_high) |
> +                        ((csmask & mask_bits_f16_low)
> +                               << addr_shift_f16_low));
> +               return;
> +       }
> +
> +        else {
>                 csbase          = pvt->csels[dct].csbases[csrow];
>                 csmask          = pvt->csels[dct].csmasks[csrow >> 1];
>                 addr_shift      = 8;
> @@ -1224,6 +1269,23 @@ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
>         return ddr3_cs_size(cs_mode, false);
>  }
>
> +/*
> + * F16h supports 64 bit DCT interfaces
> + * and has only limited cs_modes
> + */
> +static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
> +                               unsigned cs_mode)
> +{
> +       WARN_ON(cs_mode > 12);
> +
> +       if (cs_mode == 0 || cs_mode == 3 || cs_mode == 4
> +               || cs_mode == 6 || cs_mode == 8 || cs_mode == 9
> +               || cs_mode == 12)
> +               return -1;
> +       else
> +               return ddr3_cs_size(cs_mode, false);
> +}
> +
>  static void read_dram_ctl_register(struct amd64_pvt *pvt)
>  {
>
> @@ -1681,6 +1743,17 @@ static struct amd64_family_type amd64_family_types[] = {
>                         .read_dct_pci_cfg       = f15_read_dct_pci_cfg,
>                 }
>         },
> +       [F16_CPUS] = {
> +               .ctl_name = "F16h",
> +               .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
> +               .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
> +               .ops = {
> +                       .early_channel_count    = f1x_early_channel_count,
> +                       .map_sysaddr_to_csrow   = f1x_map_sysaddr_to_csrow,
> +                       .dbam_to_cs             = f16_dbam_to_chip_select,
> +                       .read_dct_pci_cfg       = f16_read_dct_pci_cfg,
> +               }
> +       },
>  };
>
>  static struct pci_dev *pci_get_related_function(unsigned int vendor,
> @@ -2071,8 +2144,12 @@ static void read_mc_regs(struct amd64_pvt *pvt)
>         pvt->ecc_sym_sz = 4;
>
>         if (c->x86 >= 0x10) {
> -               amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
> -               amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
> +
> +               /* F16h has only one DCT, so don't take the branch if f16h */
> +               if (c->x86 != 0x16) {
> +                       amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
> +                       amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
> +               }
>
>                 /* F10h, revD and later can do x8 ECC too */
>                 if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25))
> @@ -2483,6 +2560,11 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
>                 pvt->ops                = &amd64_family_types[F15_CPUS].ops;
>                 break;
>
> +       case 0x16:
> +               fam_type                = &amd64_family_types[F16_CPUS];
> +               pvt->ops                = &amd64_family_types[F16_CPUS].ops;
> +               break;
> +
>         default:
>                 amd64_err("Unsupported family!\n");
>                 return NULL;
> @@ -2695,6 +2777,14 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
>                 .class          = 0,
>                 .class_mask     = 0,
>         },
> +       {
> +               .vendor         = PCI_VENDOR_ID_AMD,
> +               .device         = PCI_DEVICE_ID_AMD_16H_NB_F2,
> +               .subvendor      = PCI_ANY_ID,
> +               .subdevice      = PCI_ANY_ID,
> +               .class          = 0,
> +               .class_mask     = 0,
> +       },
>
>         {0, }
>  };
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index 9a666cb..36e8c6b 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -36,6 +36,10 @@
>   *     Changes/Fixes by Borislav Petkov <borislav.petkov@xxxxxxx>:
>   *             - misc fixes and code cleanups
>   *
> + *     Changes by Aravind Gopalakrishnan <aravind.gopalakrishnan@xxxxxxx>
> + *             - family 16h support added. New PCI Device IDs and some code
> + *             to perform fam 16h specific ops.
> + *
>   * This module is based on the following documents
>   * (available from http://www.amd.com/):
>   *
> @@ -172,7 +176,12 @@
>   */
>  #define PCI_DEVICE_ID_AMD_15H_NB_F1    0x1601
>  #define PCI_DEVICE_ID_AMD_15H_NB_F2    0x1602
> -
> +#define PCI_DEVICE_ID_AMD_16H_NB_F0    0x1530
> +#define PCI_DEVICE_ID_AMD_16H_NB_F1    0x1531
> +#define PCI_DEVICE_ID_AMD_16H_NB_F2    0x1532
> +#define PCI_DEVICE_ID_AMD_16H_NB_F3    0x1533
> +#define PCI_DEVICE_ID_AMD_16H_NB_F4    0x1534
> +#define PCI_DEVICE_ID_AMD_16H_NB_F5    0x1535
>
>  /*
>   * Function 1 - Address Map
> @@ -300,6 +309,7 @@ enum amd_families {
>         K8_CPUS = 0,
>         F10_CPUS,
>         F15_CPUS,
> +       F16_CPUS,
>         NUM_FAMILIES,
>  };
>
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index 1679ff6..59f732f 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -519,6 +519,8 @@
>  #define PCI_DEVICE_ID_AMD_11H_NB_LINK  0x1304
>  #define PCI_DEVICE_ID_AMD_15H_NB_F3    0x1603
>  #define PCI_DEVICE_ID_AMD_15H_NB_F4    0x1604
> +#define PCI_DEVICE_ID_AMD_16H_NB_F3    0x1533
> +#define PCI_DEVICE_ID_AMD_16H_NB_F4    0x1534

What is the point of adding identical #defines both here and in
amd64_edac.h?  Also, read the note at the top of
include/linux/pci_ids.h.

>  #define PCI_DEVICE_ID_AMD_CNB17H_F3    0x1703
>  #define PCI_DEVICE_ID_AMD_LANCE                0x2000
>  #define PCI_DEVICE_ID_AMD_LANCE_HOME   0x2001
> --
> 1.7.10.4
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux