GFX v11_0_3 ras needs to be enabled if poison mode is supported. Driver doesn't need issue an feature enable call in gfx_v11_0 late init phase. The ras late init call is already centralized to amdgpu_ras_late_init. In addition, move poison_mode check out of common helper like amdgpu_ras_is_supported and amdgpu_ras_is_feature_allowed ensure only GFX RAS is enabled when poison mode is supported. Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 ++++++++----------------- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 26 ------------- 2 files changed, 16 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dd7cdc234d7e..35e70860d628 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -126,6 +126,7 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, uint64_t addr); static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); +static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev); #ifdef CONFIG_X86_MCE_AMD static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); struct mce_notifier_adev_list { @@ -757,16 +758,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, return 0; } -static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev, - struct ras_common_if *head) -{ - if (amdgpu_ras_is_feature_allowed(adev, head) || - amdgpu_ras_is_poison_mode_supported(adev)) - return 1; - else - return 0; -} - /* wrapper of psp_ras_enable_features */ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable) @@ -797,7 +788,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, } /* Do not enable if it is not allowed. */ - if (enable && !amdgpu_ras_check_feature_allowed(adev, head)) + if (enable && !amdgpu_ras_is_feature_allowed(adev, head)) goto out; /* Only enable ras feature operation handle on host side */ @@ -2420,9 +2411,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) } /* - * this is workaround for vega20 workstation sku, - * force enable gfx ras, ignore vbios gfx ras flag - * due to GC EDC can not write + * Common helpers for device or IP specific RAS quirks including + * a). Enable gfx ras on D16406 or D36002 board + * b). Enable gfx ras in gfx_v11_0_3 if poison mode is supported */ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) { @@ -2431,10 +2422,16 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) if (!ctx) return; + /* Enable gfx ras on specific board */ if (strnstr(ctx->vbios_version, "D16406", sizeof(ctx->vbios_version)) || - strnstr(ctx->vbios_version, "D36002", - sizeof(ctx->vbios_version))) + strnstr(ctx->vbios_version, "D36002", + sizeof(ctx->vbios_version))) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); + + /* Enable gfx ras on gfx_v11_0_3 if poison mode is supported */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) && + amdgpu_ras_is_poison_mode_supported(adev)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); } @@ -2502,6 +2499,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) 1 << AMDGPU_RAS_BLOCK__MMHUB); } + amdgpu_ras_query_poison_mode(adev); + amdgpu_ras_get_quirks(adev); /* hw_supported needs to be aligned with RAS block mask. */ @@ -2659,8 +2658,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } - amdgpu_ras_query_poison_mode(adev); - if (amdgpu_ras_fs_init(adev)) { r = -EINVAL; goto release_con; @@ -3115,26 +3112,12 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block) { - int ret = 0; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (block >= AMDGPU_RAS_BLOCK_COUNT) return 0; - ret = ras && (adev->ras_enabled & (1 << block)); - - /* For the special asic with mem ecc enabled but sram ecc - * not enabled, even if the ras block is not supported on - * .ras_enabled, if the asic supports poison mode and the - * ras block has ras configuration, it can be considered - * that the ras block supports ras function. - */ - if (!ret && - amdgpu_ras_is_poison_mode_supported(adev) && - amdgpu_ras_get_ras_block(adev, block, 0)) - ret = 1; - - return ret; + return (ras && (adev->ras_enabled & (1 << block))); } int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 690e121d9dda..11e0c574b9f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4650,26 +4650,6 @@ static int gfx_v11_0_early_init(void *handle) return gfx_v11_0_init_microcode(adev); } -static int gfx_v11_0_ras_late_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if *gfx_common_if; - int ret; - - gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!gfx_common_if) - return -ENOMEM; - - gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; - - ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); - if (ret) - dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); - - kfree(gfx_common_if); - return 0; -} - static int gfx_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4683,12 +4663,6 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { - r = gfx_v11_0_ras_late_init(handle); - if (r) - return r; - } - return 0; } -- 2.17.1