Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 4/22/2022 1:57 PM, Quan, Evan wrote:
[AMD Official Use Only]



-----Original Message-----
From: Lazar, Lijo <Lijo.Lazar@xxxxxxx>
Sent: Monday, April 11, 2022 6:06 PM
To: Quan, Evan <Evan.Quan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>;
pmenzel@xxxxxxxxxxxxx; arthur.marsh@xxxxxxxxxxxxxxxx
Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI



On 4/11/2022 2:24 PM, Evan Quan wrote:
The adev->pm.mutx is already held at the beginning of

amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_
enable_vce.
But on their calling path, amdgpu_display_bandwidth_update will be
called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They will
then try to acquire the same adev->pm.mutex and deadlock will occur.


What about using locked versions of get_sclk/mclk and leave the rest as they
are?
[Quan, Evan] Yeah, that(adding two new APIs for locked versions of get_sclk/mclk) should also work. But considering there cannot be other ASICs who need them, they are kind of redundant.
Meanwhile my version is just to get everything reverted back to its original. So, it should be quite safe. How do you think?


As far as I see -

1) It moves display watermark updates outside of locking
2) It adds bandwidth update under common compute_clocks paths.

I guess it is not the same as pre-mutex change.

Thanks,
Lijo

BR
Evan

Thanks,
Lijo

By placing amdgpu_display_bandwidth_update outside of adev-
pm.mutex
protection(considering logically they do not need such protection) and
restructuring the call flow accordingly, we can eliminate the deadlock
issue. This comes with no real logics change.

Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
amdgpu_dpm.c")
Reported-by: Paul Menzel <pmenzel@xxxxxxxxxxxxx>
Reported-by: Arthur Marsh <arthur.marsh@xxxxxxxxxxxxxxxx>
Link: https:
//lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@xxxxxxxxxx.
de/
BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957
Signed-off-by: Evan Quan <evan.quan@xxxxxxx>
--
v1->v2:
    - rich the commit messages(Paul)
---
   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39
+++++++++++++++++++
   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
   4 files changed, 39 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 5504d81c77b7..72e7b5d40af6 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct
amdgpu_device *adev, enum amd_pp_sensors senso
   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
   {
   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	int i;

   	if (!adev->pm.dpm_enabled)
   		return;
@@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct
amdgpu_device *adev)
   	if (!pp_funcs->pm_compute_clocks)
   		return;

+	if (adev->mode_info.num_crtc)
+		amdgpu_display_bandwidth_update(adev);
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		if (ring && ring->sched.ready)
+			amdgpu_fence_wait_empty(ring);
+	}
+
   	mutex_lock(&adev->pm.mutex);
   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
   	mutex_unlock(&adev->pm.mutex);
@@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct
amdgpu_device *adev, bool enable)
   {
   	int ret = 0;

+	if (adev->family == AMDGPU_FAMILY_SI) {
+		mutex_lock(&adev->pm.mutex);
+		if (enable) {
+			adev->pm.dpm.uvd_active = true;
+			adev->pm.dpm.state =
POWER_STATE_TYPE_INTERNAL_UVD;
+		} else {
+			adev->pm.dpm.uvd_active = false;
+		}
+		mutex_unlock(&adev->pm.mutex);
+
+		amdgpu_dpm_compute_clocks(adev);
+		return;
+	}
+
   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_UVD, !enable);
   	if (ret)
   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6
+477,21 @@
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
   {
   	int ret = 0;

+	if (adev->family == AMDGPU_FAMILY_SI) {
+		mutex_lock(&adev->pm.mutex);
+		if (enable) {
+			adev->pm.dpm.vce_active = true;
+			/* XXX select vce level based on ring/task */
+			adev->pm.dpm.vce_level =
AMD_VCE_LEVEL_AC_ALL;
+		} else {
+			adev->pm.dpm.vce_active = false;
+		}
+		mutex_unlock(&adev->pm.mutex);
+
+		amdgpu_dpm_compute_clocks(adev);
+		return;
+	}
+
   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_VCE, !enable);
   	if (ret)
   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git
a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 9613c6181c17..d3fe149d8476 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -1028,16 +1028,6 @@ static int
amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
   void amdgpu_legacy_dpm_compute_clocks(void *handle)
   {
   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i = 0;
-
-	if (adev->mode_info.num_crtc)
-		amdgpu_display_bandwidth_update(adev);
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		if (ring && ring->sched.ready)
-			amdgpu_fence_wait_empty(ring);
-	}

   	amdgpu_dpm_get_active_displays(adev);

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index caae54487f9c..633dab14f51c 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct
amdgpu_device *adev)
   }
   #endif

-static int si_set_powergating_by_smu(void *handle,
-				     uint32_t block_type,
-				     bool gate)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	switch (block_type) {
-	case AMD_IP_BLOCK_TYPE_UVD:
-		if (!gate) {
-			adev->pm.dpm.uvd_active = true;
-			adev->pm.dpm.state =
POWER_STATE_TYPE_INTERNAL_UVD;
-		} else {
-			adev->pm.dpm.uvd_active = false;
-		}
-
-		amdgpu_legacy_dpm_compute_clocks(handle);
-		break;
-	case AMD_IP_BLOCK_TYPE_VCE:
-		if (!gate) {
-			adev->pm.dpm.vce_active = true;
-			/* XXX select vce level based on ring/task */
-			adev->pm.dpm.vce_level =
AMD_VCE_LEVEL_AC_ALL;
-		} else {
-			adev->pm.dpm.vce_active = false;
-		}
-
-		amdgpu_legacy_dpm_compute_clocks(handle);
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
   static int si_set_sw_state(struct amdgpu_device *adev)
   {
   	return (amdgpu_si_send_msg_to_smc(adev,
PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
@@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs
= {
   	.print_power_state = &si_dpm_print_power_state,
   	.debugfs_print_current_performance_level =
&si_dpm_debugfs_print_current_performance_level,
   	.force_performance_level = &si_dpm_force_performance_level,
-	.set_powergating_by_smu = &si_set_powergating_by_smu,
   	.vblank_too_short = &si_dpm_vblank_too_short,
   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
   	.get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --
git
a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index dbed72c1e0c6..1eb4e613b27a 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void
*handle)
   {
   	struct pp_hwmgr *hwmgr = handle;
   	struct amdgpu_device *adev = hwmgr->adev;
-	int i = 0;
-
-	if (adev->mode_info.num_crtc)
-		amdgpu_display_bandwidth_update(adev);
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		if (ring && ring->sched.ready)
-			amdgpu_fence_wait_empty(ring);
-	}

   	if (!amdgpu_device_has_dc_support(adev)) {
   		amdgpu_dpm_get_active_displays(adev);




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux