On Fri, Sep 20, 2024 at 2:43 AM <jiadong.zhu@xxxxxxx> wrote: > > From: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> > > Extract the resume sequence from sdma_v5_0_gfx_resume for > starting/restarting an individual instance. > > Signed-off-by: Jiadong Zhu <Jiadong.Zhu@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 253 ++++++++++++++----------- > 1 file changed, 138 insertions(+), 115 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > index 3e48ea38385d..e813da1e48aa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > @@ -705,14 +705,16 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) > } > > /** > - * sdma_v5_0_gfx_resume - setup and start the async dma engines > + * sdma_v5_0_gfx_resume_instance - start/restart a certain sdma engine > * > * @adev: amdgpu_device pointer > + * @i: instance > + * @restore: used to restore wptr when restart > * > - * Set up the gfx DMA ring buffers and enable them (NAVI10). > - * Returns 0 for success, error for failure. > + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. > + * Return 0 for success. > */ > -static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) > +static int sdma_v5_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) > { > struct amdgpu_ring *ring; > u32 rb_cntl, ib_cntl; > @@ -722,142 +724,163 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) > u32 temp; > u32 wptr_poll_cntl; > u64 wptr_gpu_addr; > - int i, r; > > - for (i = 0; i < adev->sdma.num_instances; i++) { > - ring = &adev->sdma.instance[i].ring; > + ring = &adev->sdma.instance[i].ring; > > - if (!amdgpu_sriov_vf(adev)) > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); > + if (!amdgpu_sriov_vf(adev)) > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); > > - /* Set ring buffer size in dwords */ > - rb_bufsz = order_base_2(ring->ring_size / 4); > - rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); > - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); > + /* Set ring buffer size in dwords */ > + rb_bufsz = order_base_2(ring->ring_size / 4); > + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); > + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); > #ifdef __BIG_ENDIAN > - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); > - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, > - RPTR_WRITEBACK_SWAP_ENABLE, 1); > + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); > + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, > + RPTR_WRITEBACK_SWAP_ENABLE, 1); > #endif > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); > - > - /* Initialize the ring buffer's read and write pointers */ > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); > + > + /* Initialize the ring buffer's read and write pointers */ > + if (restore) { > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2)); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); > + } else { > WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); > WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); > WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); > WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); > - > - /* setup the wptr shadow polling */ > - wptr_gpu_addr = ring->wptr_gpu_addr; > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), > - lower_32_bits(wptr_gpu_addr)); > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), > - upper_32_bits(wptr_gpu_addr)); > - wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, > - mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); > - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > - SDMA0_GFX_RB_WPTR_POLL_CNTL, > - F32_POLL_ENABLE, 1); > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), > - wptr_poll_cntl); > - > - /* set the wb address whether it's enabled or not */ > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), > - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), > - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); > - > - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); > - > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), > - ring->gpu_addr >> 8); > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), > - ring->gpu_addr >> 40); > - > + } > + /* setup the wptr shadow polling */ > + wptr_gpu_addr = ring->wptr_gpu_addr; > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), > + lower_32_bits(wptr_gpu_addr)); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), > + upper_32_bits(wptr_gpu_addr)); > + wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, > + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); > + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, > + SDMA0_GFX_RB_WPTR_POLL_CNTL, > + F32_POLL_ENABLE, 1); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), > + wptr_poll_cntl); > + > + /* set the wb address whether it's enabled or not */ > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), > + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), > + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); > + > + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); > + > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), > + ring->gpu_addr >> 8); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), > + ring->gpu_addr >> 40); > + > + if (!restore) > ring->wptr = 0; > > - /* before programing wptr to a less value, need set minor_ptr_update first */ > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); > + /* before programing wptr to a less value, need set minor_ptr_update first */ > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); > > - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), > - lower_32_bits(ring->wptr << 2)); > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), > - upper_32_bits(ring->wptr << 2)); > - } > + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), > + lower_32_bits(ring->wptr << 2)); > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), > + upper_32_bits(ring->wptr << 2)); > + } > > - doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); > - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, > - mmSDMA0_GFX_DOORBELL_OFFSET)); > + doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); > + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, > + mmSDMA0_GFX_DOORBELL_OFFSET)); > > - if (ring->use_doorbell) { > - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); > - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, > - OFFSET, ring->doorbell_index); > - } else { > - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); > - } > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), > - doorbell_offset); > + if (ring->use_doorbell) { > + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); > + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, > + OFFSET, ring->doorbell_index); > + } else { > + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); > + } > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), > + doorbell_offset); > > - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, > - ring->doorbell_index, 20); > + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, > + ring->doorbell_index, 20); > > - if (amdgpu_sriov_vf(adev)) > - sdma_v5_0_ring_set_wptr(ring); > + if (amdgpu_sriov_vf(adev)) > + sdma_v5_0_ring_set_wptr(ring); > > - /* set minor_ptr_update to 0 after wptr programed */ > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); > + /* set minor_ptr_update to 0 after wptr programed */ > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); > > - if (!amdgpu_sriov_vf(adev)) { > - /* set utc l1 enable flag always to 1 */ > - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); > - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); > - > - /* enable MCBP */ > - temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); > - > - /* Set up RESP_MODE to non-copy addresses */ > - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); > - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); > - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); > - > - /* program default cache read and write policy */ > - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); > - /* clean read policy and write policy bits */ > - temp &= 0xFF0FFF; > - temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); > - } > + if (!amdgpu_sriov_vf(adev)) { > + /* set utc l1 enable flag always to 1 */ > + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); > + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); > + > + /* enable MCBP */ > + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); > + > + /* Set up RESP_MODE to non-copy addresses */ > + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); > + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); > + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); > + > + /* program default cache read and write policy */ > + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); > + /* clean read policy and write policy bits */ > + temp &= 0xFF0FFF; > + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); > + } > > - if (!amdgpu_sriov_vf(adev)) { > - /* unhalt engine */ > - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); > - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); > - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); > - } > + if (!amdgpu_sriov_vf(adev)) { > + /* unhalt engine */ > + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); > + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); > + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); > + } > > - /* enable DMA RB */ > - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); > + /* enable DMA RB */ > + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); > > - ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); > - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); > + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); > + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); > #ifdef __BIG_ENDIAN > - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); > + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); > #endif > - /* enable DMA IBs */ > - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); > + /* enable DMA IBs */ > + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); > > - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ > - sdma_v5_0_ctx_switch_enable(adev, true); > - sdma_v5_0_enable(adev, true); > - } > + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ > + sdma_v5_0_ctx_switch_enable(adev, true); > + sdma_v5_0_enable(adev, true); > + } > + > + return amdgpu_ring_test_helper(ring); > +} > > - r = amdgpu_ring_test_helper(ring); > +/** > + * sdma_v5_0_gfx_resume - setup and start the async dma engines > + * > + * @adev: amdgpu_device pointer > + * > + * Set up the gfx DMA ring buffers and enable them (NAVI10). > + * Returns 0 for success, error for failure. > + */ > +static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) > +{ > + int i, r; > + > + for (i = 0; i < adev->sdma.num_instances; i++) { > + r = sdma_v5_0_gfx_resume_instance(adev, i, false); > if (r) > return r; > } > -- > 2.25.1 >