On Fri, May 12, 2023 at 3:49 AM YiPeng Chai <YiPeng.Chai@xxxxxxx> wrote: > > When testing sdma ib ring fails to detect sdma > hang for sdma fed error, force to perform soft > reset. > > Signed-off-by: YiPeng Chai <YiPeng.Chai@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 16 ++++++++++++++++ > 1 file changed, 16 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > index 3d9a80511a45..1d463e1fd3ae 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > @@ -53,6 +53,12 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); > #define SDMA0_HYP_DEC_REG_START 0x5880 > #define SDMA0_HYP_DEC_REG_END 0x589a > #define SDMA1_HYP_DEC_REG_OFFSET 0x20 > +#define regRLC_RLCS_FED_STATUS_0 0x4eff > +#define regRLC_RLCS_FED_STATUS_0_BASE_IDX 1 > +#define RLC_RLCS_FED_STATUS_0__SDMA0_FED_ERR_MASK 0x00000040L > +#define RLC_RLCS_FED_STATUS_0__SDMA1_FED_ERR_MASK 0x00000080L > +#define RLC_RLCS_FED_STATUS_0__SDMA0_FED_ERR__SHIFT 0x6 > +#define RLC_RLCS_FED_STATUS_0__SDMA1_FED_ERR__SHIFT 0x7 > > static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); > static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); > @@ -760,6 +766,7 @@ static bool sdma_v6_0_check_soft_reset(void *handle) > struct amdgpu_ring *ring; > int i, r; > long tmo = msecs_to_jiffies(1000); > + uint32_t rlc_status0 = 0; > > for (i = 0; i < adev->sdma.num_instances; i++) { > ring = &adev->sdma.instance[i].ring; > @@ -768,6 +775,15 @@ static bool sdma_v6_0_check_soft_reset(void *handle) > return true; > } > > + /* Soft reset should be performed for sdma fed error to > + * recover sdma engine, so when testing sdma ib ring fails > + * to detect sdma hang, force to perform soft reset. > + */ > + rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0); > + if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) || > + REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) > + return true; > + > return false; > } > > -- > 2.34.1 >