Add firmware write/read point reset sync through shared memory Signed-off-by: James Zhu <James.Zhu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 51 ++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 7eb3b9d..6aef04f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -165,6 +165,8 @@ static int vcn_v2_5_sw_init(void *handle) return r; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + volatile struct amdgpu_fw_shared *fw_shared; + if (adev->vcn.harvest_config & (1 << j)) continue; adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; @@ -207,6 +209,9 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; } + + fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); } if (amdgpu_sriov_vf(adev)) { @@ -230,8 +235,16 @@ static int vcn_v2_5_sw_init(void *handle) */ static int vcn_v2_5_sw_fini(void *handle) { - int r; + int i, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + volatile struct amdgpu_fw_shared *fw_shared; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + } if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); @@ -424,6 +437,15 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } } @@ -507,13 +529,16 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( @@ -750,6 +775,7 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -883,6 +909,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= ~fw_queue_ring_reset; /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); @@ -972,6 +999,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) vcn_v2_5_mc_resume(adev); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; if (adev->vcn.harvest_config & (1 << i)) continue; /* VCN global tiling registers */ @@ -1045,6 +1073,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + fw_shared->multi_queue.decode_queue_mode |= fw_queue_ring_reset; /* programm the RB_BASE for ring buffer */ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); @@ -1057,19 +1086,25 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= ~fw_queue_ring_reset; + + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.encode_lowlatency_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst[i].ring_enc[1]; WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~fw_queue_ring_reset; } return 0; @@ -1390,6 +1425,8 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); if (!ret_code) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); @@ -1405,6 +1442,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); /* Restore */ + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst[inst_idx].ring_enc[0]; ring->wptr = 0; WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); @@ -1412,7 +1450,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.encode_lowlatency_queue_mode |= fw_queue_ring_reset; ring = &adev->vcn.inst[inst_idx].ring_enc[1]; ring->wptr = 0; WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -1420,9 +1460,12 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~fw_queue_ring_reset; + fw_shared->multi_queue.decode_queue_mode |= fw_queue_ring_reset; WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + fw_shared->multi_queue.decode_queue_mode &= ~fw_queue_ring_reset; /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx