On 11/14/2024 3:14 AM, Alex Deucher wrote: > Have a separate work handler for each VCN instance. This > paves the way for per instance VCN power gating at runtime. > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 83 +++++++++++++------------ > drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 8 ++- > drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 17 ++--- > drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- > drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 4 +- > drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 +- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 5 +- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 4 +- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 5 +- > drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 5 +- > 10 files changed, 75 insertions(+), 62 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > index b4a550795470..3383e4146c6a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c > @@ -118,12 +118,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) > unsigned int fw_shared_size, log_offset; > int i, r; > > - INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); > - mutex_init(&adev->vcn.vcn_pg_lock); > mutex_init(&adev->vcn.vcn1_jpeg1_workaround); > - atomic_set(&adev->vcn.total_submission_cnt, 0); > - for (i = 0; i < adev->vcn.num_vcn_inst; i++) > + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { > + mutex_init(&adev->vcn.inst[i].vcn_pg_lock); > + adev->vcn.inst[i].adev = adev; > + adev->vcn.inst[i].inst = i; > + atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); > + INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); > atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); > + } > > if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && > (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) > @@ -271,10 +274,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) > amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); > > amdgpu_ucode_release(&adev->vcn.inst[j].fw); > + mutex_destroy(&adev->vcn.inst[j].vcn_pg_lock); > } > > mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); > - mutex_destroy(&adev->vcn.vcn_pg_lock); > > return 0; > } > @@ -325,8 +328,10 @@ int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) > int amdgpu_vcn_suspend(struct amdgpu_device *adev) > { > bool in_ras_intr = amdgpu_ras_intr_triggered(); > + int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > > /* err_event_athub will corrupt VCPU buffer, so we need to > * restore fw data and clear buffer in amdgpu_vcn_resume() */ > @@ -382,46 +387,45 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) > > static void amdgpu_vcn_idle_work_handler(struct work_struct *work) > { > - struct amdgpu_device *adev = > - container_of(work, struct amdgpu_device, vcn.idle_work.work); > + struct amdgpu_vcn_inst *vcn_inst = > + container_of(work, struct amdgpu_vcn_inst, idle_work.work); > + struct amdgpu_device *adev = vcn_inst->adev; > unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; > - unsigned int i, j; > + unsigned int i = vcn_inst->inst, j; > int r = 0; > > - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { > - if (adev->vcn.harvest_config & (1 << j)) > - continue; > - > - for (i = 0; i < adev->vcn.num_enc_rings; ++i) > - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); > + if (adev->vcn.harvest_config & (1 << i)) > + return; This check is no longer required. Also, fence[AMDGPU_MAX_VCN_INSTANCES] may be avoided. fences is good enough to count the total outstanding fences for enc/dec rings. Apart from that looks fine. Thanks, Lijo > > - /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ > - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && > - !adev->vcn.using_unified_queue) { > - struct dpg_pause_state new_state; > + for (j = 0; j < adev->vcn.num_enc_rings; ++j) > + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]); > > - if (fence[j] || > - unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) > - new_state.fw_based = VCN_DPG_STATE__PAUSE; > - else > - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; > + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ > + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && > + !adev->vcn.using_unified_queue) { > + struct dpg_pause_state new_state; > > - adev->vcn.pause_dpg_mode(adev, j, &new_state); > - } > + if (fence[i] || > + unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt))) > + new_state.fw_based = VCN_DPG_STATE__PAUSE; > + else > + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; > > - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); > - fences += fence[j]; > + adev->vcn.pause_dpg_mode(adev, i, &new_state); > } > > - if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { > + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec); > + fences += fence[i]; > + > + if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { > amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, > - AMD_PG_STATE_GATE); > + AMD_PG_STATE_GATE); > r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, > - false); > + false); > if (r) > dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); > } else { > - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); > + schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); > } > } > > @@ -430,18 +434,18 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) > struct amdgpu_device *adev = ring->adev; > int r = 0; > > - atomic_inc(&adev->vcn.total_submission_cnt); > + atomic_inc(&adev->vcn.inst[ring->me].total_submission_cnt); > > - if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { > + if (!cancel_delayed_work_sync(&adev->vcn.inst[ring->me].idle_work)) { > r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, > true); > if (r) > dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); > } > > - mutex_lock(&adev->vcn.vcn_pg_lock); > + mutex_lock(&adev->vcn.inst[ring->me].vcn_pg_lock); > amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, > - AMD_PG_STATE_UNGATE); > + AMD_PG_STATE_UNGATE); > > /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ > if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && > @@ -466,7 +470,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) > > adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); > } > - mutex_unlock(&adev->vcn.vcn_pg_lock); > + mutex_unlock(&adev->vcn.inst[ring->me].vcn_pg_lock); > } > > void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) > @@ -479,9 +483,10 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) > !adev->vcn.using_unified_queue) > atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); > > - atomic_dec(&ring->adev->vcn.total_submission_cnt); > + atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt); > > - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); > + schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work, > + VCN_IDLE_TIMEOUT); > } > > int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > index 7b528123b36e..0d9a4d946eac 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h > @@ -279,6 +279,8 @@ struct amdgpu_vcn_fw_shared { > }; > > struct amdgpu_vcn_inst { > + struct amdgpu_device *adev; > + int inst; > struct amdgpu_bo *vcpu_bo; > void *cpu_addr; > uint64_t gpu_addr; > @@ -300,6 +302,9 @@ struct amdgpu_vcn_inst { > const struct firmware *fw; /* VCN firmware */ > uint8_t vcn_config; > uint32_t vcn_codec_disable_mask; > + atomic_t total_submission_cnt; > + struct mutex vcn_pg_lock; > + struct delayed_work idle_work; > }; > > struct amdgpu_vcn_ras { > @@ -308,7 +313,6 @@ struct amdgpu_vcn_ras { > > struct amdgpu_vcn { > unsigned fw_version; > - struct delayed_work idle_work; > unsigned num_enc_rings; > enum amd_powergating_state cur_state; > bool indirect_sram; > @@ -316,9 +320,7 @@ struct amdgpu_vcn { > uint8_t num_vcn_inst; > struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; > struct amdgpu_vcn_reg internal; > - struct mutex vcn_pg_lock; > struct mutex vcn1_jpeg1_workaround; > - atomic_t total_submission_cnt; > > unsigned harvest_config; > int (*pause_dpg_mode)(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c > index 5ea96c983517..9ca964e11540 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c > @@ -150,7 +150,7 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) > return r; > > /* Override the work func */ > - adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler; > + adev->vcn.inst->idle_work.work.func = vcn_v1_0_idle_work_handler; > > amdgpu_vcn_setup_ucode(adev); > > @@ -277,7 +277,7 @@ static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > + cancel_delayed_work_sync(&adev->vcn.inst->idle_work); > > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE && > @@ -301,7 +301,7 @@ static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block) > struct amdgpu_device *adev = ip_block->adev; > bool idle_work_unexecuted; > > - idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); > + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst->idle_work); > if (idle_work_unexecuted) { > if (adev->pm.dpm_enabled) > amdgpu_dpm_enable_vcn(adev, false, 0); > @@ -1828,8 +1828,9 @@ static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block, > > static void vcn_v1_0_idle_work_handler(struct work_struct *work) > { > - struct amdgpu_device *adev = > - container_of(work, struct amdgpu_device, vcn.idle_work.work); > + struct amdgpu_vcn_inst *vcn_inst = > + container_of(work, struct amdgpu_vcn_inst, idle_work.work); > + struct amdgpu_device *adev = vcn_inst->adev; > unsigned int fences = 0, i; > > for (i = 0; i < adev->vcn.num_enc_rings; ++i) > @@ -1862,14 +1863,14 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) > amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, > AMD_PG_STATE_GATE); > } else { > - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); > + schedule_delayed_work(&adev->vcn.inst->idle_work, VCN_IDLE_TIMEOUT); > } > } > > static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > - bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); > + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst->idle_work); > > mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); > > @@ -1921,7 +1922,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) > > void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) > { > - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); > + schedule_delayed_work(&ring->adev->vcn.inst->idle_work, VCN_IDLE_TIMEOUT); > mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > index e42cfc731ad8..115f33c3ab68 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > @@ -313,7 +313,7 @@ static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > + cancel_delayed_work_sync(&adev->vcn.inst->idle_work); > > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE && > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > index b9be304aa294..6fb08ed09310 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > @@ -390,12 +390,12 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) > struct amdgpu_device *adev = ip_block->adev; > int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > - > for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > if (adev->vcn.harvest_config & (1 << i)) > continue; > > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > + > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE && > RREG32_SOC15(VCN, i, mmUVD_STATUS))) > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > index a3627700ed48..70a1b85a4efa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > @@ -422,12 +422,12 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) > struct amdgpu_device *adev = ip_block->adev; > int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > - > for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > if (adev->vcn.harvest_config & (1 << i)) > continue; > > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > + > if (!amdgpu_sriov_vf(adev)) { > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE && > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > index e11bd6cae8fc..656c2a1c4bf9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > @@ -359,11 +359,12 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) > struct amdgpu_device *adev = ip_block->adev; > int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > - > for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > if (adev->vcn.harvest_config & (1 << i)) > continue; > + > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > + > if (!amdgpu_sriov_vf(adev)) { > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE && > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > index d4d92d4e2cab..8eea78c498da 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > @@ -331,8 +331,10 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) > static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > + int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > > if (adev->vcn.cur_state != AMD_PG_STATE_GATE) > vcn_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > index 26bd1954b5e3..7086f98c2ddc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > @@ -300,11 +300,12 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) > struct amdgpu_device *adev = ip_block->adev; > int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > - > for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > if (adev->vcn.harvest_config & (1 << i)) > continue; > + > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > + > if (!amdgpu_sriov_vf(adev)) { > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE && > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > index 95b31167f552..4c641813aeff 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > @@ -274,11 +274,12 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) > struct amdgpu_device *adev = ip_block->adev; > int i; > > - cancel_delayed_work_sync(&adev->vcn.idle_work); > - > for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { > if (adev->vcn.harvest_config & (1 << i)) > continue; > + > + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); > + > if (!amdgpu_sriov_vf(adev)) { > if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || > (adev->vcn.cur_state != AMD_PG_STATE_GATE &&