On 1/28/2022 2:22 PM, Lang Yu wrote:
On 01/28/ , Lazar, Lijo wrote:
On 1/28/2022 12:24 PM, Lang Yu wrote:
We observed a GPU hang when querying GMC CG state(i.e.,
cat amdgpu_pm_info) on cyan skillfish. Acctually, cyan
skillfish doesn't support any CG features.
Only allow ASICs which support GMC CG features accessing
related registers. As some ASICs support GMC CG but cg_flags
are not set. Use GC IP version instead of cg_flags to
determine whether GMC CG is supported or not.
v2:
- Use a function to encapsulate more functionality.(Christian)
- Use IP verion to determine whether CG is supported or not.(Lijo)
Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 +
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +++
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 +++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++
5 files changed, 20 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d426de48d299..be1f03b02af6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -876,3 +876,13 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
return 0;
}
+
+bool amdgpu_gmc_cg_enabled(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 3):
+ return false;
+ default:
+ return true;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 93505bb0a36c..b916e73c7de1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -338,4 +338,5 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
+bool amdgpu_gmc_cg_enabled(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 73ab0eebe4e2..4e46f618d6c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1156,6 +1156,9 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!amdgpu_gmc_cg_enabled(adev))
+ return;
+
I think Christian suggested amdgpu_gmc_cg_enabled function assuming it's a
common logic for all ASICs based on flags. Now that assumption has changed.
Now the logic is a specific IP version doesn't enable CG which is known
beforehand. So we could maintain the check in the specific IP version block
itself (gmc 10 in this example). No need to call another common function
which checks IP version again.
Thanks. You mean just like this?
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 73ab0eebe4e2..bddaf2417344 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1156,6 +1156,9 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3))
*flags = 0;
Yes, add the above line also.
Thanks,
Lijo
+ return;
+
adev->mmhub.funcs->get_clockgating(adev, flags);
if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
Regards,
Lang
Thanks,
Lijo
adev->mmhub.funcs->get_clockgating(adev, flags);
if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ca9841d5669f..ff9dff2a6cf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1695,6 +1695,9 @@ static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
+ if (!amdgpu_gmc_cg_enabled(adev))
+ return;
+
if (amdgpu_sriov_vf(adev))
*flags = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4595027a8c63..faf017609dfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1952,6 +1952,9 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!amdgpu_gmc_cg_enabled(adev))
+ return;
+
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v1_0_get_clockgating(adev, flags);