Currently we pre-allocate entities and fences for all the HW IPs on context creation and some of which are might never be used. This patch tries to resolve entity/fences wastage by creating entities for a HW IP only when it is required. Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 163 +++++++++++++----------- 1 file changed, 92 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 91638a2a5163..43f1266b1b2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -58,64 +58,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } -static int amdgpu_ctx_init(struct amdgpu_device *adev, - enum drm_sched_priority priority, - struct drm_file *filp, - struct amdgpu_ctx *ctx) +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip) { - unsigned i, j; - int r; + struct amdgpu_device *adev = ctx->adev; + struct drm_gpu_scheduler **scheds; + struct drm_gpu_scheduler *sched; + unsigned num_scheds = 0; + enum drm_sched_priority priority; + int j, r; - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) - return -EINVAL; + ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip], + sizeof(struct amdgpu_ctx_entity), GFP_KERNEL); - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; + if (!ctx->entities[hw_ip]) + return -ENOMEM; - memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; + for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) { - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i], - sizeof(struct amdgpu_ctx_entity), - GFP_KERNEL); + struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j]; - if (!ctx->entities[0]) { - r = -ENOMEM; - goto error_cleanup_entity_memory; + entity->sequence = 1; + entity->fences = kcalloc(amdgpu_sched_jobs, + sizeof(struct dma_fence*), GFP_KERNEL); + if (!entity->fences) { + r = -ENOMEM; + goto error_cleanup_memory; } } - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { - - struct amdgpu_ctx_entity *entity = &ctx->entities[i][j]; - - entity->sequence = 1; - entity->fences = kcalloc(amdgpu_sched_jobs, - sizeof(struct dma_fence*), GFP_KERNEL); - if (!entity->fences) { - r = -ENOMEM; - goto error_cleanup_memory; - } - } - kref_init(&ctx->refcount); - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - struct drm_gpu_scheduler **scheds; - struct drm_gpu_scheduler *sched; - unsigned num_scheds = 0; - - switch (i) { + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + switch (hw_ip) { case AMDGPU_HW_IP_GFX: sched = &adev->gfx.gfx_ring[0].sched; scheds = &sched; @@ -156,12 +129,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, scheds = adev->jpeg.jpeg_sched; num_scheds = adev->jpeg.num_jpeg_sched; break; - } + } - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - r = drm_sched_entity_init(&ctx->entities[i][j].entity, - priority, scheds, - num_scheds, &ctx->guilty); + for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) { + r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity, + priority, scheds, + num_scheds, &ctx->guilty); if (r) goto error_cleanup_entities; } @@ -169,28 +142,54 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, return 0; error_cleanup_entities: - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - drm_sched_entity_destroy(&ctx->entities[i][j].entity); + for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) + drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity); error_cleanup_memory: - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { - struct amdgpu_ctx_entity *entity = &ctx->entities[i][j]; + for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) { + struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j]; - kfree(entity->fences); - entity->fences = NULL; - } - -error_cleanup_entity_memory: - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - kfree(ctx->entities[i]); - ctx->entities[i] = NULL; + kfree(entity->fences); + entity->fences = NULL; } + kfree(ctx->entities[hw_ip]); + ctx->entities[hw_ip] = NULL; + return r; } +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum drm_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) +{ + int r; + + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + ctx->adev = adev; + + kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; + + return 0; + +} + static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); @@ -201,10 +200,18 @@ static void amdgpu_ctx_fini(struct kref *ref) return; for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { - struct amdgpu_ctx_entity *entity = &ctx->entities[i][j]; + struct amdgpu_ctx_entity *entity; + + if (!ctx->entities[i]) + continue; + + entity = &ctx->entities[i][j]; + if (!entity->fences) + continue; - for (k = 0; k < amdgpu_sched_jobs; ++k) + for (k = 0; k < amdgpu_sched_jobs; ++k) { dma_fence_put(entity->fences[k]); + } kfree(entity->fences); } @@ -237,6 +244,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return -EINVAL; } + if (ctx->entities[hw_ip] == NULL) { + amdgpu_ctx_init_entity(ctx, hw_ip); + } + + *entity = &ctx->entities[hw_ip][ring].entity; return 0; } @@ -281,8 +293,11 @@ static void amdgpu_ctx_do_release(struct kref *ref) ctx = container_of(ref, struct amdgpu_ctx, refcount); for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + if (!ctx->entities[i]) + continue; drm_sched_entity_destroy(&ctx->entities[i][j].entity); + } amdgpu_ctx_fini(ref); } @@ -573,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { struct drm_sched_entity *entity; + if (!ctx->entities[i]) + continue; + entity = &ctx->entities[i][j].entity; timeout = drm_sched_entity_flush(entity, timeout); } @@ -598,6 +616,9 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { struct drm_sched_entity *entity; + if (!ctx->entities[i]) + continue; + entity = &ctx->entities[i][j].entity; drm_sched_entity_fini(entity); } -- 2.24.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx