[PATCH 1/3] drm/amdgpu/gfx: fix ref counting for ring based profile handling

Alex Deucher <alexander.deucher@xxxxxxx> · Tue, 11 Mar 2025 10:17:09 -0400

Only increment the power profile on the first submission.
Since the decrement may end up being pushed out as new
submissions come in, we only need to increment it once.

Fixes: 1443dd3c67f6 ("drm/amd/pm: fix and simplify workload handling”)
Cc: Yang Wang <kevinyang.wang@xxxxxxx>
Cc: Kenneth Feng <kenneth.feng@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 28 ++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 984e6ff6e4632..90396aa8ec9f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -2142,12 +2142,25 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
 		amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
 }
 
+static unsigned int
+amdgpu_gfx_get_kernel_ring_fence_counts(struct amdgpu_device *adev)
+{
+	unsigned int i, fences = 0;
+
+	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
+		fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
+		fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+
+	return fences;
+}
+
 void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
 		container_of(work, struct amdgpu_device, gfx.idle_work.work);
 	enum PP_SMC_POWER_PROFILE profile;
-	u32 i, fences = 0;
+	unsigned int fences = 0;
 	int r;
 
 	if (adev->gfx.num_gfx_rings)
@@ -2155,10 +2168,8 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
 	else
 		profile = PP_SMC_POWER_PROFILE_COMPUTE;
 
-	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
-		fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
-	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
-		fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+	fences = amdgpu_gfx_get_kernel_ring_fence_counts(adev);
+
 	if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
 		r = amdgpu_dpm_switch_power_profile(adev, profile, false);
 		if (r)
@@ -2174,6 +2185,7 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	enum PP_SMC_POWER_PROFILE profile;
+	unsigned int fences = 0;
 	int r;
 
 	if (adev->gfx.num_gfx_rings)
@@ -2181,15 +2193,17 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
 	else
 		profile = PP_SMC_POWER_PROFILE_COMPUTE;
 
-	atomic_inc(&adev->gfx.total_submission_cnt);
+	fences = amdgpu_gfx_get_kernel_ring_fence_counts(adev);
 
-	if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) {
+	if (!cancel_delayed_work_sync(&adev->gfx.idle_work) && !fences &&
+	    !atomic_read(&adev->gfx.total_submission_cnt)) {
 		r = amdgpu_dpm_switch_power_profile(adev, profile, true);
 		if (r)
 			dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
 				 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
 				 "fullscreen 3D" : "compute");
 	}
+	atomic_inc(&adev->gfx.total_submission_cnt);
 }
 
 void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
-- 
2.48.1