Allow various kinds of memory integrity methods (e.g. ECC/EDC) to be enabled or disabled. By default, all features are disabled. EDC is Error Detection and Correction. It can detect ECC errors and do 0 or more of: count SEC (single error corrected) and DED (double error detected, i.e. uncorrected ECC error), halt the affected block, interrupt the CPU. Currently, only counting errors is supported. Signed-off-by: David Panariti <David.Panariti at amd.com><mailto:David.Panariti at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 34 +++++++++++++++++++++++++++----- drivers/gpu/drm/amd/include/amd_shared.h | 14 +++++++++++++ 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4a16e3c..0322392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -111,6 +111,7 @@ extern int amdgpu_prim_buf_per_se; extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +extern unsigned amdgpu_ecc_flags; #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ead00d7..00e16ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -110,6 +110,7 @@ int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; int amdgpu_cntl_sb_buf_per_se = 0; int amdgpu_param_buf_per_se = 0; +unsigned amdgpu_ecc_flags = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -235,6 +236,9 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +MODULE_PARM_DESC(ecc_flags, "ECC/EDC enable flags (0 = disable ECC/EDC (default))"); +module_param_named(ecc_flags, amdgpu_ecc_flags, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2f5bf5f..05cab7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1708,7 +1708,7 @@ static int gfx_v8_0_edc_clear_counters(struct amdgpu_device *adev) count = RREG32(cp->rnmap_addr); if (count != 0) { /* - * Workaround failed. + * EDC workaround failed. * If people are interested * in EDC at all, they will * want to know which @@ -1747,14 +1747,24 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; } - DRM_INFO("Detected Carrizo.\n"); + DRM_INFO("Detected Carrizo.\n"); tmp = RREG32(mmCC_GC_EDC_CONFIG); dis_bit = REG_GET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC); if (dis_bit) { - /* On Carrizo, EDC may be disabled by a fuse. */ - DRM_INFO("EDC hardware is disabled, GC_EDC_CONFIG: 0x%08x.\n", - tmp); + /* On Carrizo, EDC may be disabled permanently by a fuse. */ + DRM_INFO("Carrizo EDC hardware is disabled, GC_EDC_CONFIG: 0x%08x.\n", + tmp); + return 0; + } + + /* + * Check if EDC has been requested by a kernel parameter. + * For Carrizo, EDC is the best/safest mode WRT error handling. + */ + if (!(amdgpu_ecc_flags + & (AMD_ECC_SUPPORT_BEST | AMD_ECC_SUPPORT_EDC))) { + DRM_INFO("EDC support has not been requested.\n"); return 0; } @@ -1892,6 +1902,20 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) goto fail; } + /* 00 - GB_EDC_DED_MODE_LOG: Count DED errors but do not halt */ + tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 0); + /* Do not propagate the errors to the next block. */ + tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 0); + WREG32(mmGB_EDC_MODE, tmp); + + tmp = RREG32(mmCC_GC_EDC_CONFIG); + + /* + * Clear EDC_DISABLE bit so the counters are available. + */ + tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0); + WREG32(mmCC_GC_EDC_CONFIG, tmp); + gfx_v8_0_edc_clear_counters(adev); fail: diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 2ccf44e..c4fd013 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -179,6 +179,20 @@ struct amd_pp_profile { #define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11) #define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12) +/* + * ECC flags + * Allows the user to choose what kind of error detection/correction is used. + * Currently, EDC is supported on Carrizo. + * + * The AMD_ECC_SUPPORT_BEST bit is used to allow a user to have the driver + * set what it thinks is best/safest mode. This may not be the same as the + * default, depending on the GPU and the application. + * Using a single bit makes it easy to request the best support without + * needing to know all currently supported modes. + */ +#define AMD_ECC_SUPPORT_BEST (1 << 0) +#define AMD_ECC_SUPPORT_EDC (1 << 1) + enum amd_pm_state_type { /* not used for dpm */ POWER_STATE_TYPE_DEFAULT, -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx at lists.freedesktop.org<mailto:amd-gfx at lists.freedesktop.org> https://lists.freedesktop.org/mailman/listinfo/amd-gfx -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170501/b6c1018e/attachment-0001.html>