Add firmware write/read point reset sync through shared memory Signed-off-by: James Zhu <James.Zhu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 42 ++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 28ef7df..852795c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -92,6 +92,7 @@ static int vcn_v2_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + volatile struct amdgpu_fw_shared *fw_shared; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, @@ -174,6 +175,8 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; + fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); return 0; } @@ -188,6 +191,9 @@ static int vcn_v2_0_sw_fini(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + + fw_shared->present_flag_0 = 0; amdgpu_virt_free_mm_table(adev); @@ -363,6 +369,15 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + /* non-cache window */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); + WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); } @@ -446,13 +461,16 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* non-cache window */ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( @@ -777,6 +795,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; @@ -880,6 +899,8 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + fw_shared->multi_queue.decode_queue_mode |= fw_queue_ring_reset; + /* set the write pointer delay */ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -902,6 +923,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= ~fw_queue_ring_reset; /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); @@ -910,6 +932,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) static int vcn_v2_0_start(struct amdgpu_device *adev) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1044,6 +1067,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + fw_shared->multi_queue.decode_queue_mode |= fw_queue_ring_reset; /* programm the RB_BASE for ring buffer */ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); @@ -1056,20 +1080,25 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.encode_lowlatency_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~fw_queue_ring_reset; return 0; } @@ -1191,6 +1220,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); if (!ret_code) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); @@ -1205,6 +1235,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); /* Restore */ + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst->ring_enc[0]; ring->wptr = 0; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); @@ -1212,7 +1243,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.encode_lowlatency_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst->ring_enc[1]; ring->wptr = 0; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -1220,9 +1253,12 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.decode_queue_mode |= fw_queue_ring_reset; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + fw_shared->multi_queue.decode_queue_mode &= ~fw_queue_ring_reset; /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx