RE: [PATCH 2/2] drm/amdgpu: Enable gfx v11_0_3 ras if poison mode is supported

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - General]

The series is:

Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx>

> -----Original Message-----
> From: Zhang, Hawking <Hawking.Zhang@xxxxxxx>
> Sent: Sunday, June 11, 2023 6:46 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Yang, Stanley <Stanley.Yang@xxxxxxx>; Li,
> Candice <Candice.Li@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx>;
> Zhou1, Tao <Tao.Zhou1@xxxxxxx>
> Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>
> Subject: [PATCH 2/2] drm/amdgpu: Enable gfx v11_0_3 ras if poison mode is
> supported
>
> GFX v11_0_3 ras needs to be enabled if poison mode is supported. Driver doesn't
> need issue an feature enable call in gfx_v11_0 late init phase. The ras late init call
> is already centralized to amdgpu_ras_late_init.
> In addition, move poison_mode check out of common helper like
> amdgpu_ras_is_supported and amdgpu_ras_is_feature_allowed ensure only GFX
> RAS is enabled when poison mode is supported.
>
> Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 ++++++++-----------------
> drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 26 -------------
>  2 files changed, 16 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index dd7cdc234d7e..35e70860d628 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -126,6 +126,7 @@ static bool amdgpu_ras_check_bad_page_unlock(struct
> amdgpu_ras *con,
>                               uint64_t addr);
>  static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
>                               uint64_t addr);
> +static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev);
>  #ifdef CONFIG_X86_MCE_AMD
>  static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device
> *adev);  struct mce_notifier_adev_list { @@ -757,16 +758,6 @@ static int
> __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
>       return 0;
>  }
>
> -static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev,
> -             struct ras_common_if *head)
> -{
> -     if (amdgpu_ras_is_feature_allowed(adev, head) ||
> -             amdgpu_ras_is_poison_mode_supported(adev))
> -             return 1;
> -     else
> -             return 0;
> -}
> -
>  /* wrapper of psp_ras_enable_features */  int
> amdgpu_ras_feature_enable(struct amdgpu_device *adev,
>               struct ras_common_if *head, bool enable) @@ -797,7 +788,7
> @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
>       }
>
>       /* Do not enable if it is not allowed. */
> -     if (enable && !amdgpu_ras_check_feature_allowed(adev, head))
> +     if (enable && !amdgpu_ras_is_feature_allowed(adev, head))
>               goto out;
>
>       /* Only enable ras feature operation handle on host side */ @@ -2420,9
> +2411,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device
> *adev)  }
>
>  /*
> - * this is workaround for vega20 workstation sku,
> - * force enable gfx ras, ignore vbios gfx ras flag
> - * due to GC EDC can not write
> + * Common helpers for device or IP specific RAS quirks including
> + * a). Enable gfx ras on D16406 or D36002 board
> + * b). Enable gfx ras in gfx_v11_0_3 if poison mode is supported
>   */
>  static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)  { @@ -
> 2431,10 +2422,16 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device
> *adev)
>       if (!ctx)
>               return;
>
> +     /* Enable gfx ras on specific board */
>       if (strnstr(ctx->vbios_version, "D16406",
>                   sizeof(ctx->vbios_version)) ||
> -             strnstr(ctx->vbios_version, "D36002",
> -                     sizeof(ctx->vbios_version)))
> +         strnstr(ctx->vbios_version, "D36002",
> +                 sizeof(ctx->vbios_version)))
> +             adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
> +
> +     /* Enable gfx ras on gfx_v11_0_3 if poison mode is supported */
> +     if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
> +         amdgpu_ras_is_poison_mode_supported(adev))
>               adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);  }
>
> @@ -2502,6 +2499,8 @@ static void amdgpu_ras_check_supported(struct
> amdgpu_device *adev)
>                                          1 <<
> AMDGPU_RAS_BLOCK__MMHUB);
>       }
>
> +     amdgpu_ras_query_poison_mode(adev);
> +
>       amdgpu_ras_get_quirks(adev);
>
>       /* hw_supported needs to be aligned with RAS block mask. */ @@ -
> 2659,8 +2658,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>                       goto release_con;
>       }
>
> -     amdgpu_ras_query_poison_mode(adev);
> -
>       if (amdgpu_ras_fs_init(adev)) {
>               r = -EINVAL;
>               goto release_con;
> @@ -3115,26 +3112,12 @@ int amdgpu_ras_set_context(struct amdgpu_device
> *adev, struct amdgpu_ras *ras_co  int amdgpu_ras_is_supported(struct
> amdgpu_device *adev,
>               unsigned int block)
>  {
> -     int ret = 0;
>       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>
>       if (block >= AMDGPU_RAS_BLOCK_COUNT)
>               return 0;
>
> -     ret = ras && (adev->ras_enabled & (1 << block));
> -
> -     /* For the special asic with mem ecc enabled but sram ecc
> -      * not enabled, even if the ras block is not supported on
> -      * .ras_enabled, if the asic supports poison mode and the
> -      * ras block has ras configuration, it can be considered
> -      * that the ras block supports ras function.
> -      */
> -     if (!ret &&
> -         amdgpu_ras_is_poison_mode_supported(adev) &&
> -         amdgpu_ras_get_ras_block(adev, block, 0))
> -             ret = 1;
> -
> -     return ret;
> +     return (ras && (adev->ras_enabled & (1 << block)));
>  }
>
>  int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) diff --git
> a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 690e121d9dda..11e0c574b9f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -4650,26 +4650,6 @@ static int gfx_v11_0_early_init(void *handle)
>       return gfx_v11_0_init_microcode(adev);  }
>
> -static int gfx_v11_0_ras_late_init(void *handle) -{
> -     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -     struct ras_common_if *gfx_common_if;
> -     int ret;
> -
> -     gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
> -     if (!gfx_common_if)
> -             return -ENOMEM;
> -
> -     gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
> -
> -     ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
> -     if (ret)
> -             dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
> -
> -     kfree(gfx_common_if);
> -     return 0;
> -}
> -
>  static int gfx_v11_0_late_init(void *handle)  {
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -
> 4683,12 +4663,6 @@ static int gfx_v11_0_late_init(void *handle)
>       if (r)
>               return r;
>
> -     if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
> -             r = gfx_v11_0_ras_late_init(handle);
> -             if (r)
> -                     return r;
> -     }
> -
>       return 0;
>  }
>
> --
> 2.17.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux