From: Le Ma <le.ma@xxxxxxx> Update cwsr area size for gfx950 to fit the new user queue buffer validation. The size of LDS calculation is referred from gfx950 thunk implementation. Signed-off-by: Le Ma <le.ma@xxxxxxx> Acked-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ad29634f8b44c..ecccd7adbab4d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -394,7 +394,8 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ - gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ + gfxv == 90500) vgpr_size = 0x80000; else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ @@ -405,9 +406,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) return vgpr_size; } -#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ + HWREG_SIZE_PER_CU) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -431,7 +433,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) : cu_num * 32; - wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, PAGE_SIZE); -- 2.47.0