[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Alex Deucher <alexdeucher@xxxxxxxxx> > Sent: Tuesday, September 24, 2024 9:41 PM > To: Zhu, Jiadong <Jiadong.Zhu@xxxxxxx> > Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Deucher, Alexander > <Alexander.Deucher@xxxxxxx> > Subject: Re: [PATCH 4/4] drm/amdgpu/sdma5.2: implement ring reset callback for > sdma5.2 > > On Fri, Sep 20, 2024 at 2:31 AM <jiadong.zhu@xxxxxxx> wrote: > > > > From: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> > > > > Implement sdma queue reset callback via MMIO. > > > > Signed-off-by: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 86 > > +++++++++++++++++++++++++- > > 1 file changed, 85 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > index 21457093eae9..3c93a8954a09 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > > @@ -597,7 +597,7 @@ static int sdma_v5_2_gfx_resume_instance(struct > amdgpu_device *adev, int i, bool > > WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); > > WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, > > mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); > > > > - if (restore) > > + if (!restore) > > Revered logic here. Intended? If so, it should probably be in the previous patch. > I will fix it in v2 Jiadong > > ring->wptr = 0; > > > > /* before programing wptr to a less value, need set > > minor_ptr_update first */ @@ -1424,6 +1424,89 @@ static int > sdma_v5_2_wait_for_idle(void *handle) > > return -ETIMEDOUT; > > } > > > > +static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned > > +int vmid) { > > + struct amdgpu_device *adev = ring->adev; > > + int i, j; > > + u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, > > +soft_reset, stat1_reg; > > + > > + if (amdgpu_sriov_vf(adev)) > > + return -EINVAL; > > + > > + for (i = 0; i < adev->sdma.num_instances; i++) { > > + if (ring == &adev->sdma.instance[i].ring) > > + break; > > + } > > + > > + if (i == adev->sdma.num_instances) { > > + DRM_ERROR("sdma instance not found\n"); > > + return -EINVAL; > > + } > > + > > + /* stop queue */ > > + ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_GFX_IB_CNTL)); > > + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, > 0); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), > > + ib_cntl); > > + > > + rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_GFX_RB_CNTL)); > > + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, > RB_ENABLE, 0); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_GFX_RB_CNTL), > > + rb_cntl); > > + > > + /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze > bit to 1 */ > > + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_FREEZE)); > > + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), > > + freeze); > > + > > + for (j = 0; j < adev->usec_timeout; j++) { > > + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, > > + mmSDMA0_FREEZE)); > > + > > + if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1) > > + break; > > + udelay(1); > > + } > > + > > + > > + if (j == adev->usec_timeout) { > > + stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_STATUS1_REG)); > > + if ((stat1_reg & 0x3FF) != 0x3FF) { > > + DRM_ERROR("cannot soft reset as sdma not idle\n"); > > + return -ETIMEDOUT; > > + } > > + } > > + > > + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_F32_CNTL)); > > + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), > > + f32_cntl); > > + > > + cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); > > + cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); > > + > > + /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 > mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ > > + preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_GFX_PREEMPT)); > > + preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, > IB_PREEMPT, 0); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_GFX_PREEMPT), > > + preempt); > > + > > + soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); > > + soft_reset |= 1 << > GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << > > + i; > > + > > + > > + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); > > + > > + udelay(50); > > + > > + soft_reset &= ~(1 << > GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT > > + << i); > > + > > + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); > > + > > + /* unfreeze and unhalt */ > > + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, > mmSDMA0_FREEZE)); > > + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); > > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), > > + freeze); > > > SDMA 5.2 has SDMAx_QUEUE_RESET_REQ similar to SDMA 6.x. Any reason > to use soft reset rather than queue reset? > > Alex > I changed to use SDMAx_QUEUE_RESET_REQ on nv21, in some cases the reset does not success(firmware stall on ). Synced with sdma hw design, they said QUEUE_RESET_REQ is prototype on OSS5.2 and some function is not committed. Jiadong > > + > > + return sdma_v5_2_gfx_resume_instance(adev, i, true); } > > + > > static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) { > > int i, r = 0; > > @@ -1859,6 +1942,7 @@ static const struct amdgpu_ring_funcs > sdma_v5_2_ring_funcs = { > > .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, > > .init_cond_exec = sdma_v5_2_ring_init_cond_exec, > > .preempt_ib = sdma_v5_2_ring_preempt_ib, > > + .reset = sdma_v5_2_reset_queue, > > }; > > > > static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev) > > -- > > 2.25.1 > >