[AMD Official Use Only - General] Series is Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Regards, Hawking -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Yang Wang Sent: Wednesday, January 24, 2024 13:59 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Wang, Yang(Kevin) <KevinYang.Wang@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: [PATCH 2/2] drm/amdgpu: adjust aca init/fini sequence to match gpu reset - move aca init/fini function into ras init/fini to adapt gpu reset sequence. - add new function amdgpu_aca_reset() Signed-off-by: Yang Wang <kevinyang.wang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 +++++++++++++-- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 52a0ea2f0ebf..40c1d5c4a9d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -688,6 +688,13 @@ void amdgpu_aca_fini(struct amdgpu_device *adev) aca_manager_fini(&aca->mgr); } +int amdgpu_aca_reset(struct amdgpu_device *adev) { + amdgpu_aca_fini(adev); + + return amdgpu_aca_init(adev); +} + void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) { struct amdgpu_aca *aca = &adev->aca; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6e9a35eda683..2da50e095883 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -185,6 +185,7 @@ struct aca_info { int amdgpu_aca_init(struct amdgpu_device *adev); void amdgpu_aca_fini(struct amdgpu_device *adev); +int amdgpu_aca_reset(struct amdgpu_device *adev); void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs); bool amdgpu_aca_is_enabled(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 56d9dfa61290..dac73f8fbda4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4039,10 +4039,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_get_pcie_info(adev); - r = amdgpu_aca_init(adev); - if (r) - return r; - r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); @@ -4437,8 +4433,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) amdgpu_reset_fini(adev); - amdgpu_aca_fini(adev); - /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5b519dc4df01..f7c6ea60316d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3348,10 +3348,18 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; - if (amdgpu_aca_is_enabled(adev)) + if (amdgpu_aca_is_enabled(adev)) { + if (amdgpu_in_reset(adev)) + r = amdgpu_aca_reset(adev); + else + r = amdgpu_aca_init(adev); + if (r) + return r; + amdgpu_ras_set_aca_debug_mode(adev, false); - else + } else { amdgpu_ras_set_mca_debug_mode(adev, false); + } list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { obj = node->ras_obj; @@ -3420,6 +3428,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_fini(adev); + WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared"); if (AMDGPU_RAS_GET_FEATURES(con->features)) -- 2.34.1