Re: [PATCH V2 1/2] drm/amdgpu: Move xgmi ras initialization from .late_init to .early_init

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 1/20/2022 8:48 AM, yipechai wrote:
Move xgmi ras initialization from .late_init to .early_init, which let
xgmi ras can be initialized only once.

Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 15 ++++++++++-----
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  5 +++++
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  5 +++++
  4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3483a82f5734..788c0257832d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -436,6 +436,16 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
  	} while (fault->timestamp < tmp);
  }
+int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
+{
+	if (!adev->gmc.xgmi.connected_to_cpu) {
+		adev->gmc.xgmi.ras = &xgmi_ras;
+		amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
+	}
+
+	return 0;
+}
+
  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
  {
  	int r;
@@ -452,11 +462,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
  			return r;
  	}
- if (!adev->gmc.xgmi.connected_to_cpu) {
-		adev->gmc.xgmi.ras = &xgmi_ras;
-		amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
-	}
-
  	if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
  		r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
  		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0001631cfedb..ac4c0e50b45c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -318,6 +318,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
  			      uint16_t pasid, uint64_t timestamp);
  void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
  				     uint16_t pasid);
+int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev);
  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
  void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 4f8d356f8432..7a6ad5d467b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -719,6 +719,7 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
static int gmc_v10_0_early_init(void *handle)
  {
+	int r;
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v10_0_set_mmhub_funcs(adev);
@@ -734,6 +735,10 @@ static int gmc_v10_0_early_init(void *handle)
  	adev->gmc.private_aperture_end =
  		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ r = amdgpu_gmc_ras_early_init(adev);
+	if (r)
+		return r;
+

At this point it's unknown if RAS is applicable for the SKU. I think this failure check shouldn't be there (here and below one).

amdgpu_gmc_ras_early_init is return 0 always, that way also this check is not needed.

Thanks,
Lijo

  	return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c76ffd1a70cd..3cdd3d459d51 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1318,6 +1318,7 @@ static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
static int gmc_v9_0_early_init(void *handle)
  {
+	int r;
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
@@ -1347,6 +1348,10 @@ static int gmc_v9_0_early_init(void *handle)
  	adev->gmc.private_aperture_end =
  		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ r = amdgpu_gmc_ras_early_init(adev);
+	if (r)
+		return r;
+
  	return 0;
  }



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux