[AMD Official Use Only - General] Reviewed-by: Zhigang Luo <zhigang.luo@xxxxxxx> -----Original Message----- From: Skvortsov, Victor <Victor.Skvortsov@xxxxxxx> Sent: Tuesday, March 12, 2024 1:51 PM To: Skvortsov, Victor <Victor.Skvortsov@xxxxxxx>; Luo, Zhigang <Zhigang.Luo@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Koenig, Christian <Christian.Koenig@xxxxxxx> Subject: [PATCH] drm/amdgpu/: Remove bo_create_kernel_at path from virt page Use amdgpu_vram_mgr to reserve bad page ranges. Reserved ranges will be freed by amdgpu_vram_mgr_fini() Delete bo_create path as it is redundant. Suggested-by: Christian König <christian.koenig@xxxxxxx> Signed-off-by: Victor Skvortsov <victor.skvortsov@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 55 ++---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 - 2 files changed, 3 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7a4eae36778a..2a20714b9c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -244,7 +244,6 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) */ unsigned int align_space = 512; void *bps = NULL; - struct amdgpu_bo **bps_bo = NULL; *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL); if (!*data) @@ -254,12 +253,7 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) if (!bps) goto bps_failure; - bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL); - if (!bps_bo) - goto bps_bo_failure; - (*data)->bps = bps; - (*data)->bps_bo = bps_bo; (*data)->count = 0; (*data)->last_reserved = 0; @@ -267,34 +261,12 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) return 0; -bps_bo_failure: - kfree(bps); bps_failure: kfree(*data); data_failure: return -ENOMEM; } -static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) -{ - struct amdgpu_virt *virt = &adev->virt; - struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; - struct amdgpu_bo *bo; - int i; - - if (!data) - return; - - for (i = data->last_reserved - 1; i >= 0; i--) { - bo = data->bps_bo[i]; - if (bo) { - amdgpu_bo_free_kernel(&bo, NULL, NULL); - data->bps_bo[i] = bo; - } - data->last_reserved = i; - } -} - void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev) { struct amdgpu_virt *virt = &adev->virt; @@ -305,10 +277,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev) if (!data) return; - amdgpu_virt_ras_release_bp(adev); - kfree(data->bps); - kfree(data->bps_bo); kfree(data); virt->virt_eh_data = NULL; } @@ -330,9 +299,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) { struct amdgpu_virt *virt = &adev->virt; struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - struct amdgpu_bo *bo = NULL; uint64_t bp; int i; @@ -341,26 +307,11 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) for (i = data->last_reserved; i < data->count; i++) { bp = data->bps[i].retired_page; + if (amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, + bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE)) + DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", +bp); - /* There are two cases of reserve error should be ignored: - * 1) a ras bad page has been allocated (used by someone); - * 2) a ras bad page has been reserved (duplicate error injection - * for one page); - */ - if (ttm_resource_manager_used(man)) { - amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, - bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE); - data->bps_bo[i] = NULL; - } else { - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - &bo, NULL)) - DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); - data->bps_bo[i] = bo; - } data->last_reserved = i + 1; - bo = NULL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 3f59b7b5523f..15599951e7b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -224,8 +224,6 @@ struct amdgim_vf2pf_info_v2 { struct amdgpu_virt_ras_err_handler_data { /* point to bad page records array */ struct eeprom_table_record *bps; - /* point to reserved bo array */ - struct amdgpu_bo **bps_bo; /* the count of entries */ int count; /* last reserved entry's index + 1 */ -- 2.25.1