-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of
Lazar, Lijo
Sent: Tuesday, November 30, 2021 4:10 PM
To: Quan, Evan <Evan.Quan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Feng, Kenneth
<Kenneth.Feng@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>
Subject: Re: [PATCH V2 01/17] drm/amd/pm: do not expose implementation
details to other blocks out of power
On 11/30/2021 1:12 PM, Evan Quan wrote:
Those implementation details(whether swsmu supported, some ppt_funcs
supported, accessing internal statistics ...)should be kept
internally. It's not a good practice and even error prone to expose
implementation details.
Signed-off-by: Evan Quan <evan.quan@xxxxxxx>
Change-Id: Ibca3462ceaa26a27a9145282b60c6ce5deca7752
---
drivers/gpu/drm/amd/amdgpu/aldebaran.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 25 ++---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 18 +---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 7 --
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 5 +-
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 2 +-
.../gpu/drm/amd/include/kgd_pp_interface.h | 4 +
drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 95
+++++++++++++++++++
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 25 ++++-
drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 9 +-
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 16 ++--
13 files changed, 155 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index bcfdb63b1d42..a545df4efce1 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -260,7 +260,7 @@ static int aldebaran_mode2_restore_ip(struct
amdgpu_device *adev)
adev->gfx.rlc.funcs->resume(adev);
/* Wait for FW reset event complete */
- r = smu_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
+ r = amdgpu_dpm_wait_for_event(adev,
SMU_EVENT_RESET_COMPLETE, 0);
if (r) {
dev_err(adev->dev,
"Failed to get response from firmware after reset\n");
diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 164d6a9e9fbb..0d1f00b24aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1585,22 +1585,25 @@ static int amdgpu_debugfs_sclk_set(void *data,
u64 val)
return ret;
}
- if (is_support_sw_smu(adev)) {
- ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK,
&min_freq, &max_freq);
- if (ret || val > max_freq || val < min_freq)
- return -EINVAL;
- ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK,
(uint32_t)val, (uint32_t)val);
- } else {
- return 0;
+ ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK,
&min_freq, &max_freq);
+ if (ret == -EOPNOTSUPP) {
+ ret = 0;
+ goto out;
}
+ if (ret || val > max_freq || val < min_freq) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK,
(uint32_t)val, (uint32_t)val);
+ if (ret)
+ ret = -EINVAL;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- if (ret)
- return -EINVAL;
-
- return 0;
+ return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1989f9e9379e..41cc1ffb5809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2617,7 +2617,7 @@ static int amdgpu_device_ip_late_init(struct
amdgpu_device *adev)
if (adev->asic_type == CHIP_ARCTURUS &&
amdgpu_passthrough(adev) &&
adev->gmc.xgmi.num_physical_nodes > 1)
- smu_set_light_sbr(&adev->smu, true);
+ amdgpu_dpm_set_light_sbr(adev, true);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
mutex_lock(&mgpu_info.mutex);
@@ -2857,7 +2857,7 @@ static int
amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
int i, r;
if (adev->in_s0ix)
- amdgpu_gfx_state_change_set(adev,
sGpuChangeState_D3Entry);
+ amdgpu_dpm_gfx_state_change(adev,
sGpuChangeState_D3Entry);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -3982,7 +3982,7 @@ int amdgpu_device_resume(struct drm_device
*dev, bool fbcon)
return 0;
if (adev->in_s0ix)
- amdgpu_gfx_state_change_set(adev,
sGpuChangeState_D0Entry);
+ amdgpu_dpm_gfx_state_change(adev,
sGpuChangeState_D0Entry);
/* post card */
if (amdgpu_device_need_post(adev)) { diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1916ec84dd71..3d8f82dc8c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -615,7 +615,7 @@ int amdgpu_get_gfx_off_status(struct
amdgpu_device
*adev, uint32_t *value)
mutex_lock(&adev->gfx.gfx_off_mutex);
- r = smu_get_status_gfxoff(adev, value);
+ r = amdgpu_dpm_get_status_gfxoff(adev, value);
mutex_unlock(&adev->gfx.gfx_off_mutex);
@@ -852,19 +852,3 @@ int amdgpu_gfx_get_num_kcq(struct
amdgpu_device *adev)
}
return amdgpu_num_kcq;
}
-
-/* amdgpu_gfx_state_change_set - Handle gfx power state change set
- * @adev: amdgpu_device pointer
- * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2
-sGpuChangeState_D3Entry)
- *
- */
-
-void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum
gfx_change_state state) -{
- mutex_lock(&adev->pm.mutex);
- if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->gfx_state_change_set)
- ((adev)->powerplay.pp_funcs->gfx_state_change_set(
- (adev)->powerplay.pp_handle, state));
- mutex_unlock(&adev->pm.mutex);
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index f851196c83a5..776c886fd94a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -47,12 +47,6 @@ enum amdgpu_gfx_pipe_priority {
AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
};
-/* Argument for PPSMC_MSG_GpuChangeState */ -enum
gfx_change_state {
- sGpuChangeState_D0Entry = 1,
- sGpuChangeState_D3Entry,
-};
-
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
@@ -410,5 +404,4 @@ int amdgpu_gfx_cp_ecc_error_irq(struct
amdgpu_device *adev,
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t v);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); -void
amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum
gfx_change_state state);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3c623e589b79..35c4aec04a7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -901,7 +901,7 @@ static void amdgpu_ras_get_ecc_info(struct
amdgpu_device *adev, struct ras_err_d
* choosing right query method according to
* whether smu support query error information
*/
- ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc));
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
if (ret == -EOPNOTSUPP) {
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_error_count)
@@ -2132,8 +2132,7 @@ int amdgpu_ras_recovery_init(struct
amdgpu_device *adev)
if (ret)
goto free;
- if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
send_hbm_bad_pages_num)
- adev->smu.ppt_funcs-
send_hbm_bad_pages_num(&adev->smu, con-
eeprom_control.ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+con->eeprom_control.ras_num_recs);
}
#ifdef CONFIG_X86_MCE_AMD
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 6e4bea012ea4..5fed26c8db44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -97,7 +97,7 @@ int amdgpu_umc_process_ras_data_cb(struct
amdgpu_device *adev,
int ret = 0;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc));
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
if (ret == -EOPNOTSUPP) {
if (adev->umc.ras_funcs &&
adev->umc.ras_funcs->query_ras_error_count)
@@ -160,8 +160,7 @@ int amdgpu_umc_process_ras_data_cb(struct
amdgpu_device *adev,
err_data->err_addr_cnt);
amdgpu_ras_save_bad_pages(adev);
- if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
send_hbm_bad_pages_num)
- adev->smu.ppt_funcs-
send_hbm_bad_pages_num(&adev->smu, con-
eeprom_control.ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+con->eeprom_control.ras_num_recs);
}
amdgpu_ras_reset_gpu(adev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index deae12dc777d..329a4c89f1e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -222,7 +222,7 @@ void
kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
- atomic64_read(&dev->adev->smu.throttle_int_counter));
+ amdgpu_dpm_get_thermal_throttling_counter(dev-
adev));
add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,
fifo_in, len);
}
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 5c0867ebcfce..2e295facd086 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -26,6 +26,10 @@
extern const struct amdgpu_ip_block_version pp_smu_ip_block;
+enum smu_event_type {
+ SMU_EVENT_RESET_COMPLETE = 0,
+};
+
struct amd_vce_state {
/* vce clocks */
u32 evclk;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 08362d506534..9b332c8a0079 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1614,3 +1614,98 @@ int amdgpu_pm_load_smu_firmware(struct
amdgpu_device *adev, uint32_t *smu_versio
return 0;
}
+
+int amdgpu_dpm_set_light_sbr(struct amdgpu_device *adev, bool
enable)
+{
+ return smu_set_light_sbr(&adev->smu, enable); }
+
+int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device
*adev,
+uint32_t size) {
+ int ret = 0;
+
+ if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
send_hbm_bad_pages_num)
+ ret = adev->smu.ppt_funcs-
send_hbm_bad_pages_num(&adev->smu,
+size);
+
+ return ret;
+}
+
+int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
+ enum pp_clock_type type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case PP_SCLK:
+ return smu_get_dpm_freq_range(&adev->smu, SMU_SCLK,
min, max);
+ default:
+ return -EINVAL;
+ }
+}
+
+int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
+ enum pp_clock_type type,
+ uint32_t min,
+ uint32_t max)
+{
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case PP_SCLK:
+ return smu_set_soft_freq_range(&adev->smu, SMU_SCLK,
min, max);
+ default:
+ return -EINVAL;
+ }
+}
+
+int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev,
+ enum smu_event_type event,
+ uint64_t event_arg)
+{
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ return smu_wait_for_event(&adev->smu, event, event_arg); }
+
+int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
uint32_t
+*value) {
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ return smu_get_status_gfxoff(&adev->smu, value); }
+
+uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
+amdgpu_device *adev) {
+ return atomic64_read(&adev->smu.throttle_int_counter);
+}
+
+/* amdgpu_dpm_gfx_state_change - Handle gfx power state change set
+ * @adev: amdgpu_device pointer
+ * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2
+-sGpuChangeState_D3Entry)
+ *
+ */
+void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
+ enum gfx_change_state state)
+{
+ mutex_lock(&adev->pm.mutex);
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->gfx_state_change_set)
+ ((adev)->powerplay.pp_funcs->gfx_state_change_set(
+ (adev)->powerplay.pp_handle, state));
+ mutex_unlock(&adev->pm.mutex);
+}
+
+int amdgpu_dpm_get_ecc_info(struct amdgpu_device *adev,
+ void *umc_ecc)
+{
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
In general, I don't think we need to keep this check everywhere to make
amdgpu_dpm_* backwards compatible. The usage is also inconsistent. For
ex: amdgpu_dpm_get_thermal_throttling_counter doesn't have any
is_support_sw_smu check whereas amdgpu_dpm_get_ecc_info() has it.
There is no reason to keep adding is_support_sw_smu() check for every new
public API. For sure, they are not going to work with powerplay subsystem.
I would rather prefer to leave old things and create amdgpu_smu_* for
anything which is supported only in smu subsystem. It's easier to read from
code perspective also - separate the ones which is supported by smu
component and not supported in older powerplay components.
Only for the common ones that are supported in powerplay and smu, keep
amdgpu_dpm_*, for others preference would be to keep amdgpu_smu_*.