Currently we pre-allocate entities for all the HW IPs on context creation and some of which are might never be used. This patch tries to resolve entity wastage by creating entities for a HW IP only when it is required. Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++---------- 1 file changed, 81 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a0d3d7b756eb..0a390ebe873f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) { unsigned num_entities = amdgpu_ctx_total_num_entities(); - unsigned i, j, k; + unsigned i; int r; if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, for (i = 0; i < num_entities; ++i) { struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; - entity->sequence = 1; + entity->sequence = -1; entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; } for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->init_priority = priority; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; - struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings = 0; - unsigned num_rqs = 0; + return 0; + +error_free_fences: + kfree(ctx->fences); + ctx->fences = NULL; + return r; +} + +static void amdgpu_ctx_fini(struct kref *ref) +{ + struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); + unsigned num_entities = amdgpu_ctx_total_num_entities(); + struct amdgpu_device *adev = ctx->adev; + unsigned i, j; + + if (!adev) + return; + + for (i = 0; i < num_entities; ++i) + for (j = 0; j < amdgpu_sched_jobs; ++j) + dma_fence_put(ctx->entities[0][i].fences[j]); + kfree(ctx->fences); + kfree(ctx->entities[0]); + + mutex_destroy(&ctx->lock); + + kfree(ctx); +} - switch (i) { + +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip) +{ + struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; + struct amdgpu_device *adev = ctx->adev; + unsigned num_rings = 0; + unsigned num_rqs = 0; + unsigned i, j; + int r = 0; + + switch (hw_ip) { case AMDGPU_HW_IP_GFX: rings[0] = &adev->gfx.gfx_ring[0]; num_rings = 1; break; case AMDGPU_HW_IP_COMPUTE: - for (j = 0; j < adev->gfx.num_compute_rings; ++j) - rings[j] = &adev->gfx.compute_ring[j]; + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + rings[i] = &adev->gfx.compute_ring[i]; num_rings = adev->gfx.num_compute_rings; break; case AMDGPU_HW_IP_DMA: - for (j = 0; j < adev->sdma.num_instances; ++j) - rings[j] = &adev->sdma.instance[j].ring; + for (i = 0; i < adev->sdma.num_instances; ++i) + rings[i] = &adev->sdma.instance[i].ring; num_rings = adev->sdma.num_instances; break; case AMDGPU_HW_IP_UVD: @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, num_rings = 1; break; case AMDGPU_HW_IP_VCN_DEC: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) continue; - rings[num_rings++] = &adev->vcn.inst[j].ring_dec; + rings[num_rings++] = &adev->vcn.inst[i].ring_dec; } break; case AMDGPU_HW_IP_VCN_ENC: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) continue; - for (k = 0; k < adev->vcn.num_enc_rings; ++k) - rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) + rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j]; } break; case AMDGPU_HW_IP_VCN_JPEG: - for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) continue; - rings[num_rings++] = &adev->jpeg.inst[j].ring_dec; + rings[num_rings++] = &adev->jpeg.inst[i].ring_dec; } break; - } - - for (j = 0; j < num_rings; ++j) { - if (!rings[j]->adev) - continue; + } - rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority]; - } + for (i = 0; i < num_rings; ++i) { + if (!rings[i]->adev) + continue; - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - r = drm_sched_entity_init(&ctx->entities[i][j].entity, - rqs, num_rqs, &ctx->guilty); - if (r) - goto error_cleanup_entities; + rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority]; } + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) + r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity, + rqs, num_rqs, &ctx->guilty); + if (r) + goto error_cleanup_entities; + + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) + ctx->entities[hw_ip][i].sequence = 1; + return 0; error_cleanup_entities: - for (i = 0; i < num_entities; ++i) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); - kfree(ctx->entities[0]); + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity); -error_free_fences: - kfree(ctx->fences); - ctx->fences = NULL; return r; } -static void amdgpu_ctx_fini(struct kref *ref) -{ - struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgpu_ctx_total_num_entities(); - struct amdgpu_device *adev = ctx->adev; - unsigned i, j; - - if (!adev) - return; - - for (i = 0; i < num_entities; ++i) - for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(ctx->entities[0][i].fences[j]); - kfree(ctx->fences); - kfree(ctx->entities[0]); - - mutex_destroy(&ctx->lock); - - kfree(ctx); -} - int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity) { + int r; + if (hw_ip >= AMDGPU_HW_IP_NUM) { DRM_ERROR("unknown HW IP type: %d\n", hw_ip); return -EINVAL; @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return -EINVAL; } + if (ctx->entities[hw_ip][ring].sequence == -1) { + r = amdgpu_ctx_init_entity(ctx, hw_ip); + + if (r) + return r; + } + *entity = &ctx->entities[hw_ip][ring].entity; return 0; } -- 2.23.0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx