implement ras_create_bad_pages_bo to simplify ras code Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 72 +++++++++++-------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d1bafa92ca91..fe3a57e567c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1430,41 +1430,53 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) return ret; } -/* called in gpu recovery/init */ -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) +static void amdgpu_ras_create_bad_pages_bo(struct amdgpu_device *adev) { + /* Note: the caller should guarantee con and data are not NULL */ struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; + struct ras_err_handler_data *data = con->eh_data; uint64_t bp; - struct amdgpu_bo *bo = NULL; - int i, ret = 0; - - if (!con || !con->eh_data) - return 0; + struct amdgpu_bo *bo; + int i; - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - /* reserve vram at driver post stage. */ for (i = data->last_reserved; i < data->count; i++) { + bo = NULL; bp = data->bps[i].retired_page; - /* There are two cases of reserve error should be ignored: + /* There are three cases of reserve error should be ignored: * 1) a ras bad page has been allocated (used by someone); * 2) a ras bad page has been reserved (duplicate error injection * for one page); + * 3) bo info isn't lost in gpu reset */ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &bo, NULL)) - DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp); - - data->bps_bo[i] = bo; + DRM_NOTE("RAS NOTE: reserve vram for retired page 0x%llx fail\n", bp); + else + data->bps_bo[i] = bo; data->last_reserved = i + 1; - bo = NULL; } +} + +/* called in gpu recovery/init */ +int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int ret = 0; + + if (!con || !con->eh_data) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + /* reserve vram at driver post stage. */ + amdgpu_ras_create_bad_pages_bo(adev); /* continue to save bad pages to eeprom even reesrve_vram fails */ ret = amdgpu_ras_save_bad_pages(adev); @@ -1583,9 +1595,6 @@ void amdgpu_ras_recovery_reset(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; - uint64_t bp; - struct amdgpu_bo *bo = NULL; - int i; if (!con || !con->eh_data) return; @@ -1600,26 +1609,7 @@ void amdgpu_ras_recovery_reset(struct amdgpu_device *adev) data = con->eh_data; /* force to reserve all retired page again */ data->last_reserved = 0; - - for (i = data->last_reserved; i < data->count; i++) { - bp = data->bps[i].retired_page; - - /* There are three cases of reserve error should be ignored: - * 1) a ras bad page has been allocated (used by someone); - * 2) a ras bad page has been reserved (duplicate error injection - * for one page); - * 3) bo info isn't lost in gpu reset - */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL)) - DRM_NOTE("RAS NOTE: recreate bo for retired page 0x%llx fail\n", bp); - else - data->bps_bo[i] = bo; - data->last_reserved = i + 1; - bo = NULL; - } + amdgpu_ras_create_bad_pages_bo(adev); } /* recovery end */ -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx