Re: [PATCH 1/5] drm/amdgpu/gfx: add ring helpers for setting workload profile

Alex Deucher <alexdeucher@xxxxxxxxx> · Thu, 9 Jan 2025 12:06:03 -0500

On Thu, Jan 9, 2025 at 12:59 AM Lazar, Lijo <lijo.lazar@xxxxxxx> wrote:
>
>
>
> On 1/9/2025 4:26 AM, Alex Deucher wrote:
> > Add helpers to switch the workload profile dynamically when
> > commands are submitted.  This allows us to switch to
> > the FULLSCREEN3D or COMPUTE profile when work is submitted.
> > Add a delayed work handler to delay switching out of the
> > selected profile if additional work comes in.  This works
> > the same as the VIDEO profile for VCN.  This lets dynamically
> > enable workload profiles on the fly and then move back
> > to the default when there is no work.
> >
> > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 57 +++++++++++++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 +++++
> >  2 files changed, 68 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > index 6d5d81f0dc4e7..c542617121393 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > @@ -2110,6 +2110,63 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
> >       mutex_unlock(&adev->enforce_isolation_mutex);
> >  }
> >
> > +void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
> > +{
> > +     struct amdgpu_device *adev =
> > +             container_of(work, struct amdgpu_device, gfx.idle_work.work);
> > +     enum PP_SMC_POWER_PROFILE profile;
> > +     u32 i, fences = 0;
> > +     int r;
> > +
> > +     if (adev->gfx.num_gfx_rings)
> > +             profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> > +     else
> > +             profile = PP_SMC_POWER_PROFILE_COMPUTE;
>
> Since profile selection is in generic code, it makes sense to first
> check if the profile is supported for the family. Otherwise, this needs
> to be passed by the respective GFX family.

The generic code already handles this.  If you select an unsupported
profile, it's ignored when the mask is updated.

Alex

>
> Thanks,
> Lijo
>
> > +
> > +     for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
> > +             fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
> > +     for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
> > +             fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
> > +     if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
> > +             r = amdgpu_dpm_switch_power_profile(adev, profile, false);
> > +             if (r)
> > +                     dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
> > +                              profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> > +                              "fullscreen 3D" : "compute");
> > +     } else {
> > +             schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
> > +     }
> > +}
> > +
> > +void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
> > +{
> > +     struct amdgpu_device *adev = ring->adev;
> > +     enum PP_SMC_POWER_PROFILE profile;
> > +     int r;
> > +
> > +     if (adev->gfx.num_gfx_rings)
> > +             profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> > +     else
> > +             profile = PP_SMC_POWER_PROFILE_COMPUTE;
> > +
> > +     atomic_inc(&adev->gfx.total_submission_cnt);
> > +
> > +     if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) {
> > +             r = amdgpu_dpm_switch_power_profile(adev, profile, true);
> > +             if (r)
> > +                     dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
> > +                              profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> > +                              "fullscreen 3D" : "compute");
> > +     }
> > +}
> > +
> > +void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
> > +{
> > +     atomic_dec(&ring->adev->gfx.total_submission_cnt);
> > +
> > +     schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
> > +}
> > +
> >  /*
> >   * debugfs for to enable/disable gfx job submission to specific core.
> >   */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 7f9e261f47f11..6c84598caec21 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -57,6 +57,9 @@ enum amdgpu_gfx_pipe_priority {
> >  #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
> >  #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
> >
> > +/* 1 second timeout */
> > +#define GFX_PROFILE_IDLE_TIMEOUT     msecs_to_jiffies(1000)
> > +
> >  enum amdgpu_gfx_partition {
> >       AMDGPU_SPX_PARTITION_MODE = 0,
> >       AMDGPU_DPX_PARTITION_MODE = 1,
> > @@ -477,6 +480,9 @@ struct amdgpu_gfx {
> >       bool                            kfd_sch_inactive[MAX_XCP];
> >       unsigned long                   enforce_isolation_jiffies[MAX_XCP];
> >       unsigned long                   enforce_isolation_time[MAX_XCP];
> > +
> > +     atomic_t                        total_submission_cnt;
> > +     struct delayed_work             idle_work;
> >  };
> >
> >  struct amdgpu_gfx_ras_reg_entry {
> > @@ -585,6 +591,11 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
> >  void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
> >  void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
> >  void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
> > +
> > +void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
> > +void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
> > +void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
> > +
> >  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
> >  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
> >
>