[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Zhu, Jiadong <Jiadong.Zhu@xxxxxxx> > Sent: Wednesday, October 9, 2024 5:23 AM > To: Lazar, Lijo <Lijo.Lazar@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx> > Subject: RE: [PATCH v3 3/3] drm/amdgpu/sdma4.4.2: implement ring reset callback > for sdma4.4.2 > > [AMD Official Use Only - AMD Internal Distribution Only] > > > -----Original Message----- > > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > > Sent: Wednesday, October 9, 2024 5:13 PM > > To: Zhu, Jiadong <Jiadong.Zhu@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx> > > Subject: Re: [PATCH v3 3/3] drm/amdgpu/sdma4.4.2: implement ring reset > > callback for sdma4.4.2 > > > > > > > > On 10/9/2024 12:52 PM, jiadong.zhu@xxxxxxx wrote: > > > From: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> > > > > > > Implement sdma queue reset callback via SMU interface. > > > > > > v2: Leverage inst_stop/start functions in reset sequence. > > > Use GET_INST for physical SDMA instance. > > > Disable apu for sdma reset. > > > v3: Rephrase error prints. > > > > > > Signed-off-by: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> > > > --- > > > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 97 > > > +++++++++++++++++++----- > > > 1 file changed, 79 insertions(+), 18 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > > index c77889040760..9a970a3cb908 100644 > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > > @@ -667,11 +667,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct > > amdgpu_ring *ring, uint32_t rb_cntl) > > > * > > > * @adev: amdgpu_device pointer > > > * @i: instance to resume > > > + * @restore: used to restore wptr when restart > > > * > > > * Set up the gfx DMA ring buffers and enable them. > > > * Returns 0 for success, error for failure. > > > */ > > > -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, > > > unsigned int i) > > > +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, > > > +unsigned int i, bool restore) > > > { > > > struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; > > > u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -698,16 +699,24 @@ > > > static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned > int i) > > > WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8); > > > WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40); > > > > > > - ring->wptr = 0; > > > + if (!restore) > > > + ring->wptr = 0; > > > > > > /* before programing wptr to a less value, need set minor_ptr_update first */ > > > WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); > > > > > > /* Initialize the ring buffer's read and write pointers */ > > > - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); > > > - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); > > > - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); > > > - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); > > > + if (restore) { > > > + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring- > > >wptr << 2)); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, > > upper_32_bits(ring->wptr << 2)); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring- > > >wptr << 2)); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, > > upper_32_bits(ring->wptr << 2)); > > > + } else { > > > + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); > > > + } > > > > > > doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); > > > doorbell_offset = RREG32_SDMA(i, > > regSDMA_GFX_DOORBELL_OFFSET); @@ > > > -759,7 +768,7 @@ static void sdma_v4_4_2_gfx_resume(struct > > > amdgpu_device > > *adev, unsigned int i) > > > * Set up the page DMA ring buffers and enable them. > > > * Returns 0 for success, error for failure. > > > */ > > > -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, > > > unsigned int i) > > > +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, > > > +unsigned int i, bool restore) > > > { > > > struct amdgpu_ring *ring = &adev->sdma.instance[i].page; > > > u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -775,10 +784,17 @@ > > > static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned > int i) > > > WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl); > > > > > > /* Initialize the ring buffer's read and write pointers */ > > > - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); > > > - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); > > > - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); > > > - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); > > > + if (restore) { > > > + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring- > > >wptr << 2)); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, > > upper_32_bits(ring->wptr << 2)); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring- > > >wptr << 2)); > > > + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, > > upper_32_bits(ring->wptr << 2)); > > > + } else { > > > + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); > > > + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); > > > + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); > > > + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); > > > + } > > > > > > /* set the wb address whether it's enabled or not */ > > > WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI, @@ -792,7 > > +808,8 @@ > > > static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, > > > unsigned > > int i) > > > WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8); > > > WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> > > 40); > > > > > > - ring->wptr = 0; > > > + if (!restore) > > > + ring->wptr = 0; > > > > > > /* before programing wptr to a less value, need set minor_ptr_update first */ > > > WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1); @@ - > > 916,7 +933,7 > > > @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, > > > * Returns 0 for success, error for failure. > > > */ > > > static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, > > > - uint32_t inst_mask) > > > + uint32_t inst_mask, bool restore) > > > { > > > struct amdgpu_ring *ring; > > > uint32_t tmp_mask; > > > @@ -927,7 +944,7 @@ static int sdma_v4_4_2_inst_start(struct > > > amdgpu_device > > *adev, > > > sdma_v4_4_2_inst_enable(adev, false, inst_mask); > > > } else { > > > /* bypass sdma microcode loading on Gopher */ > > > - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && > > > + if (!restore && adev->firmware.load_type != > > AMDGPU_FW_LOAD_PSP && > > > adev->sdma.instance[0].fw) { > > > r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); > > > if (r) > > > @@ -946,9 +963,9 @@ static int sdma_v4_4_2_inst_start(struct > > > amdgpu_device > > *adev, > > > uint32_t temp; > > > > > > WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); > > > - sdma_v4_4_2_gfx_resume(adev, i); > > > + sdma_v4_4_2_gfx_resume(adev, i, restore); > > > if (adev->sdma.has_page_queue) > > > - sdma_v4_4_2_page_resume(adev, i); > > > + sdma_v4_4_2_page_resume(adev, i, restore); > > > > > > /* set utc l1 enable flag always to 1 */ > > > temp = RREG32_SDMA(i, regSDMA_CNTL); @@ -1477,7 +1494,7 > > @@ static > > > int sdma_v4_4_2_hw_init(void *handle) > > > if (!amdgpu_sriov_vf(adev)) > > > sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); > > > > > > - r = sdma_v4_4_2_inst_start(adev, inst_mask); > > > + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); > > > > > > return r; > > > } > > > @@ -1566,6 +1583,49 @@ static int sdma_v4_4_2_soft_reset(void *handle) > > > return 0; > > > } > > > > > > +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, > > > +unsigned int vmid) { > > > + struct amdgpu_device *adev = ring->adev; > > > + int i, r; > > > + u32 preempt, inst_mask; > > > + > > > + if ((adev->flags & AMD_IS_APU) || amdgpu_sriov_vf(adev)) > > > + return -EINVAL; > > > + > > > + /* stop queue */ > > > + inst_mask = 1 << ring->me; > > > + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); > > > + if (adev->sdma.has_page_queue) > > > + sdma_v4_4_2_inst_page_stop(adev, inst_mask); > > > + > > > + /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0*/ > > > + preempt = RREG32_SDMA(ring->me, regSDMA_GFX_PREEMPT); > > > + preempt = REG_SET_FIELD(preempt, SDMA_GFX_PREEMPT, > > IB_PREEMPT, 0); > > > + WREG32_SDMA(ring->me, regSDMA_GFX_PREEMPT, preempt); > > > > Confirming - should this be RB_PREEMPT? > > > + > > > + r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me)); > > > + if (r) { > > > + dev_err(adev->dev, "SDMA%d reset failed\n", ring->me); > > > > My earlier comment was if smu_v13_0_6_reset_sdma() fails, that will > > result in this print in addition to the error log in > > smu_v13_0_6_reset_sdma(). You may drop one of them. > > Sure. > > Thanks, > Jiadong > > > > > Apart from that missed to ask one basic question - this looks more > > like an SDMA engine reset rather than a SDMA queue reset (ex: only > > reset one of the SDMA RLC queues). Is this intended as a queue reset? > > > > Thanks, > > Lijo > > Yes, OSS4.X does not support to reset a queue for sdma, thus we implement it by > per engine reset. SDMA is single threaded so it is effectively a queue reset even if the entire instance is reset since there will only be one job active on the instance at a given time. Alex > > Thanks, > Jiadong > > > > + return r; > > > + } > > > + > > > + udelay(50); > > > + > > > + for (i = 0; i < adev->usec_timeout; i++) { > > > + if (!REG_GET_FIELD(RREG32_SDMA(ring->me, > > regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT)) > > > + break; > > > + udelay(1); > > > + } > > > + > > > + if (i == adev->usec_timeout) { > > > + dev_err(adev->dev, "timed out waiting for SDMA%d unhalt > > > + after > > reset\n", > > > + ring->me); > > > + return -ETIMEDOUT; > > > + } > > > + > > > + return sdma_v4_4_2_inst_start(adev, inst_mask, true); } > > > + > > > static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, > > > struct amdgpu_irq_src *source, > > > unsigned type, @@ -1948,6 > > > +2008,7 @@ static const struct amdgpu_ring_funcs > > sdma_v4_4_2_ring_funcs = { > > > .emit_wreg = sdma_v4_4_2_ring_emit_wreg, > > > .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, > > > .emit_reg_write_reg_wait = > > > amdgpu_ring_emit_reg_write_reg_wait_helper, > > > + .reset = sdma_v4_4_2_reset_queue, > > > }; > > > > > > static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = > > > { @@ -2160,7 +2221,7 @@ static int sdma_v4_4_2_xcp_resume(void > > > *handle, > > uint32_t inst_mask) > > > if (!amdgpu_sriov_vf(adev)) > > > sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); > > > > > > - r = sdma_v4_4_2_inst_start(adev, inst_mask); > > > + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); > > > > > > return r; > > > }