When testing sdma ib ring fails to detect sdma hang for sdma fed error, force to perform soft reset. V2: Add poison mode support check for special code path. Signed-off-by: YiPeng Chai <YiPeng.Chai@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3d9a80511a45..94b1364d743e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -53,6 +53,12 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +#define regRLC_RLCS_FED_STATUS_0 0x4eff +#define regRLC_RLCS_FED_STATUS_0_BASE_IDX 1 +#define RLC_RLCS_FED_STATUS_0__SDMA0_FED_ERR_MASK 0x00000040L +#define RLC_RLCS_FED_STATUS_0__SDMA1_FED_ERR_MASK 0x00000080L +#define RLC_RLCS_FED_STATUS_0__SDMA0_FED_ERR__SHIFT 0x6 +#define RLC_RLCS_FED_STATUS_0__SDMA1_FED_ERR__SHIFT 0x7 static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); @@ -768,6 +774,19 @@ static bool sdma_v6_0_check_soft_reset(void *handle) return true; } + /* Soft reset should be performed for sdma fed error to + * recover sdma engine, so when testing sdma ib ring fails + * to detect sdma hang, force to perform soft reset. + */ + if (amdgpu_ras_is_poison_mode_supported(adev)) { + uint32_t rlc_status0 = 0; + + rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0); + if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) || + REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) + return true; + } + return false; } -- 2.34.1