On Tue, Feb 4, 2025 at 9:48 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Apply the same changes to gfx7 as done to gfx9. > > Untested and probably needs some more work. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 89 ++++++++++++--------------- > 1 file changed, 39 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index 84745b2453ab..9f91c99725aa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -3092,6 +3092,33 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) > return 0; > } > > +static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, > + int mem_space, int opt, uint32_t addr0, > + uint32_t addr1, uint32_t ref, uint32_t mask, > + uint32_t inv) > +{ > + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); > + amdgpu_ring_write(ring, > + /* memory (1) or register (0) */ > + (WAIT_REG_MEM_MEM_SPACE(mem_space) | > + WAIT_REG_MEM_OPERATION(opt) | /* wait */ > + WAIT_REG_MEM_FUNCTION(3) | /* equal */ > + WAIT_REG_MEM_ENGINE(eng_sel))); > + > + WARN_ON(mem_space && addr0 & 0x3); /* Dword align */ > + amdgpu_ring_write(ring, addr0); > + amdgpu_ring_write(ring, addr1); > + amdgpu_ring_write(ring, ref); > + amdgpu_ring_write(ring, mask); > + amdgpu_ring_write(ring, inv); /* poll interval */ > +} > + > +static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > + uint32_t val, uint32_t mask) > +{ > + gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); > +} > + > /** > * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP > * > @@ -3106,6 +3133,11 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) > uint32_t seq = ring->fence_drv.sync_seq; > uint64_t addr = ring->fence_drv.gpu_addr; > > + gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); > + amdgpu_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); > + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, > + ring->fence_drv.sync_seq, > + AMDGPU_FENCE_FLAG_EXEC); > amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); > amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ > WAIT_REG_MEM_FUNCTION(3) | /* equal */ > @@ -4040,18 +4072,6 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, > amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); > } > > -static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) > -{ > - struct amdgpu_device *adev = ring->adev; > - uint32_t value = 0; > - > - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); > - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); > - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); > - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); > - WREG32(mmSQ_CMD, value); > -} > - > static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) > { > WREG32(mmSQ_IND_INDEX, > @@ -4926,34 +4946,6 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) > amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ > } > > -static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, > - int mem_space, int opt, uint32_t addr0, > - uint32_t addr1, uint32_t ref, uint32_t mask, > - uint32_t inv) > -{ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); > - amdgpu_ring_write(ring, > - /* memory (1) or register (0) */ > - (WAIT_REG_MEM_MEM_SPACE(mem_space) | > - WAIT_REG_MEM_OPERATION(opt) | /* wait */ > - WAIT_REG_MEM_FUNCTION(3) | /* equal */ > - WAIT_REG_MEM_ENGINE(eng_sel))); > - > - if (mem_space) > - BUG_ON(addr0 & 0x3); /* Dword align */ > - amdgpu_ring_write(ring, addr0); > - amdgpu_ring_write(ring, addr1); > - amdgpu_ring_write(ring, ref); > - amdgpu_ring_write(ring, mask); > - amdgpu_ring_write(ring, inv); /* poll interval */ > -} > - > -static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > - uint32_t val, uint32_t mask) > -{ > - gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); > -} > - > static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) > { > struct amdgpu_device *adev = ring->adev; > @@ -4986,14 +4978,13 @@ static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) > if (r) > return r; > > - if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) > + if (amdgpu_ring_alloc(ring, 7 + 12 + 5 + 7 + 4)) > return -ENOMEM; > - gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, > - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); > - gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); > - gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); > > - return amdgpu_ring_test_ring(ring); > + gfx_v7_0_ring_emit_pipeline_sync(ring); > + amdgpu_ring_commit(ring); > + > + return gfx_v7_0_ring_test_ib(ring, AMDGPU_QUEUE_RESET_TIMEOUT); > } > > static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { > @@ -5026,7 +5017,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { > 7 + /* gfx_v7_0_ring_emit_hdp_flush */ > 5 + /* hdp invalidate */ > 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ > - 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ > + 7 + 12 + 5 + 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ > CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ > 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ > 5, /* SURFACE_SYNC */ > @@ -5043,7 +5034,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, > .emit_wreg = gfx_v7_0_ring_emit_wreg, > - .soft_recovery = gfx_v7_0_ring_soft_recovery, > .emit_mem_sync = gfx_v7_0_emit_mem_sync, > .reset = gfx_v7_0_reset_kgq, > }; > @@ -5060,7 +5050,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { > 20 + /* gfx_v7_0_ring_emit_gds_switch */ > 7 + /* gfx_v7_0_ring_emit_hdp_flush */ > 5 + /* hdp invalidate */ > - 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ > + 7 + 12 + 5 + 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ > CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ > 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ > 7, /* gfx_v7_0_emit_mem_sync_compute */ > @@ -5076,7 +5066,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_wreg = gfx_v7_0_ring_emit_wreg, > - .soft_recovery = gfx_v7_0_ring_soft_recovery, Probably want to keep this for compute. Alex > .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, > }; > > -- > 2.34.1 >