RE: [PATCH V2 6/7] drm/amdgpu: define amdgpu_ras_late_init to call all ras blocks' .ras_late_init

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only]

With my concern in comment fixed, the series is:

Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx>

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of yipechai
> Sent: Wednesday, February 16, 2022 4:08 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Zhang, Hawking
> <Hawking.Zhang@xxxxxxx>; Clements, John <John.Clements@xxxxxxx>;
> Chai, Thomas <YiPeng.Chai@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx>
> Subject: [PATCH V2 6/7] drm/amdgpu: define amdgpu_ras_late_init to call all ras
> blocks' .ras_late_init
> 
> Define amdgpu_ras_late_init to call all ras blocks' .ras_late_init.
> 
> Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    | 44 ----------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 25 ++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h    |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      |  6 ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c     |  5 +--
>  drivers/gpu/drm/amd/amdgpu/soc15.c         |  6 +--
>  7 files changed, 34 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index a74a1b74a172..d90388dd5362 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2625,6 +2625,12 @@ static int amdgpu_device_ip_late_init(struct
> amdgpu_device *adev)
>  		adev->ip_blocks[i].status.late_initialized = true;
>  	}
> 
> +	r = amdgpu_ras_late_init(adev);
> +	if (r) {
> +		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
[Tao]: we already have debug message in amdgpu_ras_late_init, I think the print here is unnecessary.

> +		return r;
> +	}
> +
>  	amdgpu_ras_set_error_query_ready(adev, true);
> 
>  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index ebf4194b0699..49dd81c0db2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -449,50 +449,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device
> *adev)
> 
>  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)  {
> -	int r;
> -
> -	if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
> -		r = adev->umc.ras->ras_block.ras_late_init(adev, adev-
> >umc.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
> -		r = adev->mmhub.ras->ras_block.ras_late_init(adev, adev-
> >mmhub.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init)
> {
> -		r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, adev-
> >gmc.xgmi.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) {
> -		r = adev->hdp.ras->ras_block.ras_late_init(adev, adev-
> >hdp.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init)
> {
> -		r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, adev-
> >mca.mp0.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init)
> {
> -		r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, adev-
> >mca.mp1.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (adev->mca.mpio.ras && adev->mca.mpio.ras-
> >ras_block.ras_late_init) {
> -		r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, adev-
> >mca.mpio.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
>  	return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 1aff88fcea76..b5286a0d9c8a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2522,6 +2522,31 @@ void amdgpu_ras_suspend(struct amdgpu_device
> *adev)
>  		amdgpu_ras_disable_all_features(adev, 1);  }
> 
> +int amdgpu_ras_late_init(struct amdgpu_device *adev) {
> +	struct amdgpu_ras_block_list *node, *tmp;
> +	struct amdgpu_ras_block_object *obj;
> +	int r;
> +
> +	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
> +		if (!node->ras_obj) {
> +			dev_warn(adev->dev, "Warning: abnormal ras list
> node.\n");
> +			continue;
> +		}
> +		obj = node->ras_obj;
> +		if (obj->ras_late_init) {
> +			r = obj->ras_late_init(adev, &obj->ras_comm);
> +			if (r) {
> +				dev_err(adev->dev, "%s failed to execute
> ras_late_init! ret:%d\n",
> +					obj->ras_comm.name, r);
> +				return r;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  /* do some fini work before IP fini as dependence */  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev)  { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 837d1b79a9cb..143a83043d7c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -595,6 +595,7 @@ amdgpu_ras_error_to_ta(enum
> amdgpu_ras_error_type error) {
> 
>  /* called in ip_init and ip_fini */
>  int amdgpu_ras_init(struct amdgpu_device *adev);
> +int amdgpu_ras_late_init(struct amdgpu_device *adev);
>  int amdgpu_ras_fini(struct amdgpu_device *adev);  int
> amdgpu_ras_pre_fini(struct amdgpu_device *adev);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index bb40ab83fc22..1997f129db9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4791,12 +4791,6 @@ static int gfx_v9_0_ecc_late_init(void *handle)
>  	if (r)
>  		return r;
> 
> -	if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_late_init) {
> -		r = adev->gfx.ras->ras_block.ras_late_init(adev, adev-
> >gfx.ras_if);
> -		if (r)
> -			return r;
> -	}
> -
>  	if (adev->gfx.ras &&
>  	    adev->gfx.ras->enable_watchdog_timer)
>  		adev->gfx.ras->enable_watchdog_timer(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index af5a1c93861b..e26c39fcd336 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1894,10 +1894,7 @@ static int sdma_v4_0_late_init(void *handle)
>  			adev->sdma.ras->ras_block.hw_ops-
> >reset_ras_error_count(adev);
>  	}
> 
> -	if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init)
> -		return adev->sdma.ras->ras_block.ras_late_init(adev, adev-
> >sdma.ras_if);
> -	else
> -		return 0;
> +	return 0;
>  }
> 
>  static int sdma_v4_0_sw_init(void *handle) diff --git
> a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 464d635a0487..ba983398c9d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -1186,15 +1186,11 @@ static int soc15_common_early_init(void *handle)
> static int soc15_common_late_init(void *handle)  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -	int r = 0;
> 
>  	if (amdgpu_sriov_vf(adev))
>  		xgpu_ai_mailbox_get_irq(adev);
> 
> -	if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_late_init)
> -		r = adev->nbio.ras->ras_block.ras_late_init(adev, adev-
> >nbio.ras_if);
> -
> -	return r;
> +	return 0;
>  }
> 
>  static int soc15_common_sw_init(void *handle)
> --
> 2.25.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux