[AMD Official Use Only - Internal Distribution Only]
Instead of mixing to recover workflow, can this be separated out to something like early_reset().
Lijo
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> on behalf of shaoyunl <shaoyun.liu@xxxxxxx>
Sent: Friday, March 5, 2021 11:21:49 PM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
Cc: Liu, Shaoyun <Shaoyun.Liu@xxxxxxx>
Subject: [PATCH 2/5] drm/amdgpu: Add kfd init_complete flag to check from amdgpu side
Sent: Friday, March 5, 2021 11:21:49 PM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
Cc: Liu, Shaoyun <Shaoyun.Liu@xxxxxxx>
Subject: [PATCH 2/5] drm/amdgpu: Add kfd init_complete flag to check from amdgpu side
amdgpu driver may be in reset state during init which will not initialize the kfd,
driver need to initialize the KFD after reset by check the flag
Signed-off-by: shaoyunl <shaoyun.liu@xxxxxxx>
Acked-by: Alex Deucher <alexander.deucher@xxxxxxx>
Change-Id: Ic1684b55b27e0afd42bee8b9b431c4fb0afcec15
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++++++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c5343a5eecbe..a876dc3af017 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -165,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->doorbell_index.last_non_cp;
}
- kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources);
+ adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
+ adev_to_drm(adev), &gpu_resources);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4687ff2961e1..3182dd97840e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -80,6 +80,7 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
+ bool init_complete;
};
enum kgd_engine_type {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a11760ec3924..62d7ce621457 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4788,9 +4788,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- /*unlock kfd: SRIOV would do it separately */
+ /* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
+
+ /* kfd_post_reset will do nothing if kfd device is not initialized,
+ * need to bring up kfd here if it's not be initialized before
+ */
+ if (!adev->kfd.init_complete)
+ amdgpu_amdkfd_device_init(adev);
+
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://nam11.safelinks.protection.outlook.com/?url="">
driver need to initialize the KFD after reset by check the flag
Signed-off-by: shaoyunl <shaoyun.liu@xxxxxxx>
Acked-by: Alex Deucher <alexander.deucher@xxxxxxx>
Change-Id: Ic1684b55b27e0afd42bee8b9b431c4fb0afcec15
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++++++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c5343a5eecbe..a876dc3af017 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -165,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->doorbell_index.last_non_cp;
}
- kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources);
+ adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
+ adev_to_drm(adev), &gpu_resources);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4687ff2961e1..3182dd97840e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -80,6 +80,7 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
+ bool init_complete;
};
enum kgd_engine_type {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a11760ec3924..62d7ce621457 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4788,9 +4788,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- /*unlock kfd: SRIOV would do it separately */
+ /* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
+
+ /* kfd_post_reset will do nothing if kfd device is not initialized,
+ * need to bring up kfd here if it's not be initialized before
+ */
+ if (!adev->kfd.init_complete)
+ amdgpu_amdkfd_device_init(adev);
+
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://nam11.safelinks.protection.outlook.com/?url="">
_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx