Re: 6.7/regression/KASAN: null-ptr-deref in amdgpu_ras_reset_error_count+0x2d6

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 7, 2023 at 1:18 PM Mikhail Gavrilov
<mikhail.v.gavrilov@xxxxxxxxx> wrote:
>
> On Mon, Nov 6, 2023 at 8:29 PM Alex Deucher <alexdeucher@xxxxxxxxx> wrote:
> >
> > Already fixed in this commit:
> > https://gitlab.freedesktop.org/agd5f/linux/-/commit/d1d4c0b7b65b7fab2bc6f97af9e823b1c42ccdb0
> > Which is in included in last weeks PR.
> >
>
> Thanks, it fixed the issue above.
> But, unfortunately this is not the only problem which I see on my laptop.
> Now I am observing 100% GPU loading all the time.
> And it looks as I show on this screenshot: https://postimg.cc/QHLQncMg
>
> And another bisect round says that this commit is blame:
> ❯ git bisect good
> de59b69932e64d77445d973a101d81d6e7e670c6 is the first bad commit
> commit de59b69932e64d77445d973a101d81d6e7e670c6
> Author: Alex Deucher <alexander.deucher@xxxxxxx>
> Date:   Wed Sep 20 13:27:58 2023 -0400
>
>     drm/amdgpu/gmc: set a default disable value for AGP
>
>     To disable AGP, the start needs to be set to a higher
>     value than the end.  Set a default disable value for
>     the AGP aperture and allow the IP specific GMC code
>     to enable it selectively be calling amdgpu_gmc_agp_location().
>
>     Reviewed-by: Christian König <christian.koenig@xxxxxxx>
>     Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c           | 27 ++++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h           |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c        |  3 +++
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c            |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c            |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c             |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c             |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c             |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             |  3 ++-
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  2 +-
>  10 files changed, 37 insertions(+), 18 deletions(-)
>
> I checked twice and ensure that it not happens on commit
> 29495d81457a483c2859ccde59cc063034bfe47d

The attached patch should fix it.  Not sure why your GPU shows up as
busy.  The AGP aperture was just disabled.

Alex
From 844d6d9098d65c2fd8e78741c79ffc2fb6e6c2e6 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@xxxxxxx>
Date: Tue, 7 Nov 2023 14:07:44 -0500
Subject: [PATCH] drm/amdgpu: fix AGP init order

The default AGP settings were overwriting the IP selected
ones since the default was getting set after the IP ones
were selected.

Fixes: de59b69932e6 ("drm/amdgpu/gmc: set a default disable value for AGP")
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Mikhail Gavrilov <mikhail.v.gavrilov@xxxxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c     | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c     | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c      | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c      | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c      | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 2 ++
 7 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0dcb6c36b02c..cef920a93924 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1062,9 +1062,6 @@ static const char * const amdgpu_vram_names[] = {
  */
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
-	/* set the default AGP aperture state */
-	amdgpu_gmc_set_agp_default(adev, &adev->gmc);
-
 	/* On A+A platform, VRAM can be mapped as WB */
 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
 		/* reserve PAT memory space to WC for VRAM */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d8a4fddab9c1..ef80ea0929fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -672,6 +672,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
 	/* add the xgmi offset of the physical node */
 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 	if (!amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 4713a62ad586..5f794a907945 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -637,6 +637,7 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
 
 	base = adev->mmhub.funcs->get_fb_location(adev);
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, &adev->gmc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
 	if (!amdgpu_sriov_vf(adev) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 7f66954fd302..42e103d7077d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -211,6 +211,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
 
 	base <<= 24;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, mc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 61ca1a82b651..efc16e580f1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -239,6 +239,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
 
 	base <<= 24;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, mc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index fa59749c2aef..ff4ae73d27ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -413,6 +413,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
 		base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
 	base <<= 24;
 
+	amdgpu_gmc_set_agp_default(adev, mc);
 	amdgpu_gmc_vram_location(adev, mc, base);
 	amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b66c5f7e1c56..fe52d132b629 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1614,6 +1614,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 {
 	u64 base = adev->mmhub.funcs->get_fb_location(adev);
 
+	amdgpu_gmc_set_agp_default(adev, mc);
+
 	/* add the xgmi offset of the physical node */
 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
 	if (adev->gmc.xgmi.connected_to_cpu) {
-- 
2.41.0


[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux