Re: [RFC PATCH] drm/amdgpu: allocate entities on demand

Christian König <ckoenig.leichtzumerken@xxxxxxxxx> · Tue, 26 Nov 2019 10:45:45 +0100

It looks like a start, but there numerous things which needs to be fixed.

Question number one is: What's that good for? Entities are not the 
problem here. The real issue is the fence ring and the rq_list.

The rq_list could actually be made constant since it should never be 
changed by the entity. It is only changed for backward compatibility in 
drm_sched_entity_set_priority().

So I would start there and cleanup the drm_sched_entity_set_priority() 
to actually just set a new constant rq list instead.

Then we could embed the fences in amdgpu_ctx_entity as dynamic array at 
the end of the structure.

And last we can start to dynamic allocate and initialize the 
amdgpu_ctx_entity() structures.

Regards,
Christian.

Am 26.11.19 um 00:31 schrieb Nirmoy:
Ran amdgpu_test(drm) successfully multiple times to test this. But I 
am pretty sure I am missing some corner case here.


Regards,

Nirmoy

On 11/26/19 12:17 AM, Nirmoy Das wrote:
Currently we pre-allocate entities for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity wastage by creating entities
for a HW IP only when it is required.

Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
  1 file changed, 81 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a0d3d7b756eb..0a390ebe873f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
                 struct amdgpu_ctx *ctx)
  {
      unsigned num_entities = amdgpu_ctx_total_num_entities();
-    unsigned i, j, k;
+    unsigned i;
      int r;
        if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,
      for (i = 0; i < num_entities; ++i) {
          struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
  -        entity->sequence = 1;
+        entity->sequence = -1;
          entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
      }
      for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
@@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,
      ctx->init_priority = priority;
      ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
  -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
-        unsigned num_rings = 0;
-        unsigned num_rqs = 0;
+    return 0;
+
+error_free_fences:
+    kfree(ctx->fences);
+    ctx->fences = NULL;
+    return r;
+}
+
+static void amdgpu_ctx_fini(struct kref *ref)
+{
+    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
refcount);
+    unsigned num_entities = amdgpu_ctx_total_num_entities();
+    struct amdgpu_device *adev = ctx->adev;
+    unsigned i, j;
+
+    if (!adev)
+        return;
+
+    for (i = 0; i < num_entities; ++i)
+        for (j = 0; j < amdgpu_sched_jobs; ++j)
+            dma_fence_put(ctx->entities[0][i].fences[j]);
+    kfree(ctx->fences);
+    kfree(ctx->entities[0]);
+
+    mutex_destroy(&ctx->lock);
+
+    kfree(ctx);
+}
  -        switch (i) {
+
+int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
This should be a static function which I forgot to change
+{
+    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+    struct amdgpu_device *adev = ctx->adev;
+    unsigned num_rings = 0;
+    unsigned num_rqs = 0;
+    unsigned i, j;
+    int r = 0;
+
+    switch (hw_ip) {
          case AMDGPU_HW_IP_GFX:
              rings[0] = &adev->gfx.gfx_ring[0];
              num_rings = 1;
              break;
          case AMDGPU_HW_IP_COMPUTE:
-            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-                rings[j] = &adev->gfx.compute_ring[j];
+            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+                rings[i] = &adev->gfx.compute_ring[i];
              num_rings = adev->gfx.num_compute_rings;
              break;
          case AMDGPU_HW_IP_DMA:
-            for (j = 0; j < adev->sdma.num_instances; ++j)
-                rings[j] = &adev->sdma.instance[j].ring;
+            for (i = 0; i < adev->sdma.num_instances; ++i)
+                rings[i] = &adev->sdma.instance[i].ring;
              num_rings = adev->sdma.num_instances;
              break;
          case AMDGPU_HW_IP_UVD:
@@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,
              num_rings = 1;
              break;
          case AMDGPU_HW_IP_VCN_DEC:
-            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-                if (adev->vcn.harvest_config & (1 << j))
+            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                if (adev->vcn.harvest_config & (1 << i))
                      continue;
-                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
+                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
              }
              break;
          case AMDGPU_HW_IP_VCN_ENC:
-            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-                if (adev->vcn.harvest_config & (1 << j))
+            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                if (adev->vcn.harvest_config & (1 << i))
                      continue;
-                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-                    rings[num_rings++] = 
&adev->vcn.inst[j].ring_enc[k];
+                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+                    rings[num_rings++] = 
&adev->vcn.inst[i].ring_enc[j];
              }
              break;
          case AMDGPU_HW_IP_VCN_JPEG:
-            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-                if (adev->vcn.harvest_config & (1 << j))
+            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+                if (adev->vcn.harvest_config & (1 << i))
                      continue;
-                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
+                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
              }
              break;
-        }
-
-        for (j = 0; j < num_rings; ++j) {
-            if (!rings[j]->adev)
-                continue;
+    }
  -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
-        }
+    for (i = 0; i < num_rings; ++i) {
+        if (!rings[i]->adev)
+            continue;
  -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-                          rqs, num_rqs, &ctx->guilty);
-        if (r)
-            goto error_cleanup_entities;
+        rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
      }
  +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
+                rqs, num_rqs, &ctx->guilty);
+    if (r)
+        goto error_cleanup_entities;
+
+    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+        ctx->entities[hw_ip][i].sequence = 1;
+
      return 0;
    error_cleanup_entities:
-    for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-    kfree(ctx->entities[0]);
+    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+ drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
  -error_free_fences:
-    kfree(ctx->fences);
-    ctx->fences = NULL;
      return r;
  }
  -static void amdgpu_ctx_fini(struct kref *ref)
-{
-    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
refcount);
-    unsigned num_entities = amdgpu_ctx_total_num_entities();
-    struct amdgpu_device *adev = ctx->adev;
-    unsigned i, j;
-
-    if (!adev)
-        return;
-
-    for (i = 0; i < num_entities; ++i)
-        for (j = 0; j < amdgpu_sched_jobs; ++j)
-            dma_fence_put(ctx->entities[0][i].fences[j]);
-    kfree(ctx->fences);
-    kfree(ctx->entities[0]);
-
-    mutex_destroy(&ctx->lock);
-
-    kfree(ctx);
-}
-
  int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
instance,
                u32 ring, struct drm_sched_entity **entity)
  {
+    int r;
+
      if (hw_ip >= AMDGPU_HW_IP_NUM) {
          DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
          return -EINVAL;
@@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
*ctx, u32 hw_ip, u32 instance,
          return -EINVAL;
      }
  +    if (ctx->entities[hw_ip][ring].sequence == -1) {
+        r = amdgpu_ctx_init_entity(ctx, hw_ip);
+
+        if (r)
+            return r;
+    }
+
      *entity = &ctx->entities[hw_ip][ring].entity;
      return 0;
  }
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx