bad_page_threshold could be specified to detect and retire bad GPU if faulty bad pages exceed it. When it's -1, ras will use typical bad page failure value. When it's 0, disable bad gpu check. v2: correct documentation of this parameter. Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 06bfb8658dec..bb83ffb5e26a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -181,6 +181,7 @@ extern uint amdgpu_dm_abm_level; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; +extern int amdgpu_bad_page_threshold; extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d28b95f721c4..5a517f2fce35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -161,6 +161,7 @@ struct amdgpu_mgpu_info mgpu_info = { }; int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; +int amdgpu_bad_page_threshold = -1; /** * DOC: vramlimit (int) @@ -801,6 +802,16 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); module_param_named(reset_method, amdgpu_reset_method, int, 0444); +/** + * DOC: bad_page_threshold (int) + * Bad page threshold configuration is applied by GPU retirement policy. + * The detail is to specify the threshold value of faulty pages detected by + * ECC, that may result in GPU's retirement if total faulty pages by ECC + * exceed threshold value. + */ +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default typical value), 0 = disable)"); +module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx