WREG32_RLC does not specify the correct XCC so the RLCG interface does not work. Define WREG32_RLC_XCC to be like WREG32_RLC but include a parameter to specify the XCC. v2: Add new macro WREG32_RLC_XCC instead of modifying exiting WREG32_RLC macro Original v1 title: "drm/amdgpu: Add xcc instance parameter to *REG32_RLC" Signed-off-by: Victor Lu <victorchengchi.lu@xxxxxxx> --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 41 +++++++++---------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 34 +++++++-------- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 8 +++- 3 files changed, 44 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 490c8f5ddb60..084dbc41b579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -300,14 +300,14 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_RLC_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), + data, inst); if (wptr) { /* Don't read wptr with get_user because the user @@ -336,27 +336,26 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uintptr_t)wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI), - upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, - queue_id)); + upper_32_bits((uintptr_t)wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id), inst); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + CP_HQD_EOP_RPTR, INIT_FETCHER, 1), inst); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data, inst); kgd_gfx_v9_release_queue(adev, inst); @@ -494,15 +493,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch( VALID, 1); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_RLC_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_high); + watch_address_high, inst); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_RLC_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_low); + watch_address_low, inst); return watch_address_cntl; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51011e8ee90d..2a4f1a1b2196 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi { kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config, inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases, inst); /* APE1 no longer exists on GFX9 */ kgd_gfx_v9_unlock_srbm(adev, inst); @@ -239,14 +239,14 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_RLC_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), + data, inst); if (wptr) { /* Don't read wptr with get_user because the user @@ -275,25 +275,25 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), - upper_32_bits((uintptr_t)wptr)); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uintptr_t)wptr), inst); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uintptr_t)wptr), inst); WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + CP_HQD_EOP_RPTR, INIT_FETCHER, 1), inst); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data, inst); kgd_gfx_v9_release_queue(adev, inst); @@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_RLC_XCC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type, inst); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index c75e9cd5c98b..19b233189e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -110,6 +110,9 @@ #define WREG32_RLC(reg, value) \ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, 0) +#define WREG32_RLC_XCC(reg, value, inst) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, inst) + #define WREG32_RLC_EX(prefix, reg, value, inst) \ do { \ if (amdgpu_sriov_fullaccess(adev)) { \ @@ -140,7 +143,10 @@ /* for GC only */ #define RREG32_RLC(reg) \ - __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0) + +#define RREG32_RLC_XCC(reg, inst) \ + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, inst) #define WREG32_RLC_NO_KIQ(reg, value, hwip) \ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0) -- 2.34.1