On Thu, Jan 9, 2025 at 12:59 AM Lazar, Lijo <lijo.lazar@xxxxxxx> wrote: > > > > On 1/9/2025 4:26 AM, Alex Deucher wrote: > > Add helpers to switch the workload profile dynamically when > > commands are submitted. This allows us to switch to > > the FULLSCREEN3D or COMPUTE profile when work is submitted. > > Add a delayed work handler to delay switching out of the > > selected profile if additional work comes in. This works > > the same as the VIDEO profile for VCN. This lets dynamically > > enable workload profiles on the fly and then move back > > to the default when there is no work. > > > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 57 +++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 +++++ > > 2 files changed, 68 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > index 6d5d81f0dc4e7..c542617121393 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > @@ -2110,6 +2110,63 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) > > mutex_unlock(&adev->enforce_isolation_mutex); > > } > > > > +void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work) > > +{ > > + struct amdgpu_device *adev = > > + container_of(work, struct amdgpu_device, gfx.idle_work.work); > > + enum PP_SMC_POWER_PROFILE profile; > > + u32 i, fences = 0; > > + int r; > > + > > + if (adev->gfx.num_gfx_rings) > > + profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; > > + else > > + profile = PP_SMC_POWER_PROFILE_COMPUTE; > > Since profile selection is in generic code, it makes sense to first > check if the profile is supported for the family. Otherwise, this needs > to be passed by the respective GFX family. The generic code already handles this. If you select an unsupported profile, it's ignored when the mask is updated. Alex > > Thanks, > Lijo > > > + > > + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) > > + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); > > + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) > > + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); > > + if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { > > + r = amdgpu_dpm_switch_power_profile(adev, profile, false); > > + if (r) > > + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, > > + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? > > + "fullscreen 3D" : "compute"); > > + } else { > > + schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); > > + } > > +} > > + > > +void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) > > +{ > > + struct amdgpu_device *adev = ring->adev; > > + enum PP_SMC_POWER_PROFILE profile; > > + int r; > > + > > + if (adev->gfx.num_gfx_rings) > > + profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; > > + else > > + profile = PP_SMC_POWER_PROFILE_COMPUTE; > > + > > + atomic_inc(&adev->gfx.total_submission_cnt); > > + > > + if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) { > > + r = amdgpu_dpm_switch_power_profile(adev, profile, true); > > + if (r) > > + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, > > + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? > > + "fullscreen 3D" : "compute"); > > + } > > +} > > + > > +void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) > > +{ > > + atomic_dec(&ring->adev->gfx.total_submission_cnt); > > + > > + schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); > > +} > > + > > /* > > * debugfs for to enable/disable gfx job submission to specific core. > > */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > index 7f9e261f47f11..6c84598caec21 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > > @@ -57,6 +57,9 @@ enum amdgpu_gfx_pipe_priority { > > #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 > > #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 > > > > +/* 1 second timeout */ > > +#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000) > > + > > enum amdgpu_gfx_partition { > > AMDGPU_SPX_PARTITION_MODE = 0, > > AMDGPU_DPX_PARTITION_MODE = 1, > > @@ -477,6 +480,9 @@ struct amdgpu_gfx { > > bool kfd_sch_inactive[MAX_XCP]; > > unsigned long enforce_isolation_jiffies[MAX_XCP]; > > unsigned long enforce_isolation_time[MAX_XCP]; > > + > > + atomic_t total_submission_cnt; > > + struct delayed_work idle_work; > > }; > > > > struct amdgpu_gfx_ras_reg_entry { > > @@ -585,6 +591,11 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, > > void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); > > void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); > > void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); > > + > > +void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work); > > +void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring); > > +void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring); > > + > > void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); > > void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); > > >