[PATCH 2/2] drm/amdgpu: rework ctx entity creation

zhoucm1@xxxxxxx (zhoucm1) · Thu, 16 Aug 2018 16:35:26 +0800



On 2018å¹´08æ??16æ?¥ 16:11, Christian KÃ¶nig wrote:
> Am 16.08.2018 um 04:07 schrieb zhoucm1:
>>
>>
>> On 2018å¹´08æ??15æ?¥ 18:59, Christian KÃ¶nig wrote:
>>> Use a fixed number of entities for each hardware IP.
>>>
>>> The number of compute entities is reduced to four, SDMA keeps it two
>>> entities and all other engines just expose one entity.
>>>
>>> Signed-off-by: Christian KÃ¶nig <christian.koenig at amd.com>
>>> ---
>>> Â  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 291 
>>> ++++++++++++++++----------------
>>> Â  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |Â  30 ++--
>>> Â  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |Â  36 ++--
>>> Â  3 files changed, 190 insertions(+), 167 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index 0a6cd1202ee5..987b7f256463 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -27,8 +27,29 @@
>>> Â  #include "amdgpu.h"
>>> Â  #include "amdgpu_sched.h"
>>> Â  -#define to_amdgpu_ctx_ring(e)Â Â Â  \
>>> -Â Â Â  container_of((e), struct amdgpu_ctx_ring, entity)
>>> +#define to_amdgpu_ctx_entity(e)Â Â Â  \
>>> +Â Â Â  container_of((e), struct amdgpu_ctx_entity, entity)
>>> +
>>> +const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>>> +Â Â Â  [AMDGPU_HW_IP_GFX]Â Â Â  =Â Â Â  1,
>>> +Â Â Â  [AMDGPU_HW_IP_COMPUTE]Â Â Â  =Â Â Â  4,
>> Could you explain why reduct to four? otherwise it looks good to me.
>
> Currently we change the priority of the compute queues on the fly, but 
> the idea is that we will have fixed high priority and low priority 
> compute queues in the future.
Yeah, I see that, feel free to add my RB: Reviewed-by: Chunming Zhou 
<david1.zhou at amd.com>

Regards,
David Zhou
>
> We could as well say we have only 2 or 3 if the closed stack is fine 
> with that.
>
> Regards,
> Christian.
>
>>
>> Thanks,
>> David Zhou
>>> +Â Â Â  [AMDGPU_HW_IP_DMA]Â Â Â  =Â Â Â  2,
>>> +Â Â Â  [AMDGPU_HW_IP_UVD]Â Â Â  =Â Â Â  1,
>>> +Â Â Â  [AMDGPU_HW_IP_VCE]Â Â Â  =Â Â Â  1,
>>> +Â Â Â  [AMDGPU_HW_IP_UVD_ENC]Â Â Â  =Â Â Â  1,
>>> +Â Â Â  [AMDGPU_HW_IP_VCN_DEC]Â Â Â  =Â Â Â  1,
>>> +Â Â Â  [AMDGPU_HW_IP_VCN_ENC]Â Â Â  =Â Â Â  1,
>>> +};
>>> +
>>> +static int amdgput_ctx_total_num_entities(void)
>>> +{
>>> +Â Â Â  unsigned i, num_entities = 0;
>>> +
>>> +Â Â Â  for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>>> +Â Â Â Â Â Â Â  num_entities += amdgpu_ctx_num_entities[i];
>>> +
>>> +Â Â Â  return num_entities;
>>> +}
>>> Â  Â  static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  enum drm_sched_priority priority)
>>> @@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_file *filp,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct amdgpu_ctx *ctx)
>>> Â  {
>>> -Â Â Â  struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS];
>>> -Â Â Â  struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS];
>>> -Â Â Â  unsigned i, j, num_sdma_rqs, num_comp_rqs;
>>> +Â Â Â  unsigned num_entities = amdgput_ctx_total_num_entities();
>>> +Â Â Â  unsigned i, j;
>>> Â Â Â Â Â  int r;
>>> Â  Â Â Â Â Â  if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> @@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>> Â  Â Â Â Â Â  memset(ctx, 0, sizeof(*ctx));
>>> Â Â Â Â Â  ctx->adev = adev;
>>> -Â Â Â  kref_init(&ctx->refcount);
>>> -Â Â Â  spin_lock_init(&ctx->ring_lock);
>>> -Â Â Â  ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
>>> +
>>> +Â Â Â  ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sizeof(struct dma_fence*), GFP_KERNEL);
>>> Â Â Â Â Â  if (!ctx->fences)
>>> Â Â Â Â Â Â Â Â Â  return -ENOMEM;
>>> Â  -Â Â Â  mutex_init(&ctx->lock);
>>> +Â Â Â  ctx->entities[0] = kcalloc(num_entities,
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sizeof(struct amdgpu_ctx_entity),
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  GFP_KERNEL);
>>> +Â Â Â  if (!ctx->entities[0]) {
>>> +Â Â Â Â Â Â Â  r = -ENOMEM;
>>> +Â Â Â Â Â Â Â  goto error_free_fences;
>>> +Â Â Â  }
>>> Â  -Â Â Â  for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>> -Â Â Â Â Â Â Â  ctx->rings[i].sequence = 1;
>>> -Â Â Â Â Â Â Â  ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
>>> +Â Â Â  for (i = 0; i < num_entities; ++i) {
>>> +Â Â Â Â Â Â Â  struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>> +
>>> +Â Â Â Â Â Â Â  entity->sequence = 1;
>>> +Â Â Â Â Â Â Â  entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>> Â Â Â Â Â  }
>>> +Â Â Â  for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>>> +Â Â Â Â Â Â Â  ctx->entities[i] = ctx->entities[i - 1] +
>>> +Â Â Â Â Â Â Â Â Â Â Â  amdgpu_ctx_num_entities[i - 1];
>>> +
>>> +Â Â Â  kref_init(&ctx->refcount);
>>> +Â Â Â  spin_lock_init(&ctx->ring_lock);
>>> +Â Â Â  mutex_init(&ctx->lock);
>>> Â  Â Â Â Â Â  ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>>> Â Â Â Â Â  ctx->reset_counter_query = ctx->reset_counter;
>>> @@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>> Â Â Â Â Â  ctx->init_priority = priority;
>>> Â Â Â Â Â  ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>> Â  -Â Â Â  num_sdma_rqs = 0;
>>> -Â Â Â  num_comp_rqs = 0;
>>> -Â Â Â  for (i = 0; i < adev->num_rings; i++) {
>>> -Â Â Â Â Â Â Â  struct amdgpu_ring *ring = adev->rings[i];
>>> -Â Â Â Â Â Â Â  struct drm_sched_rq *rq;
>>> -
>>> -Â Â Â Â Â Â Â  rq = &ring->sched.sched_rq[priority];
>>> -Â Â Â Â Â Â Â  if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)
>>> -Â Â Â Â Â Â Â Â Â Â Â  sdma_rqs[num_sdma_rqs++] = rq;
>>> -Â Â Â Â Â Â Â  else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
>>> -Â Â Â Â Â Â Â Â Â Â Â  comp_rqs[num_comp_rqs++] = rq;
>>> -Â Â Â  }
>>> -
>>> -Â Â Â  /* create context entity for each ring */
>>> -Â Â Â  for (i = 0; i < adev->num_rings; i++) {
>>> -Â Â Â Â Â Â Â  struct amdgpu_ring *ring = adev->rings[i];
>>> +Â Â Â  for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>> +Â Â Â Â Â Â Â  struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> +Â Â Â Â Â Â Â  struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> +Â Â Â Â Â Â Â  unsigned num_rings;
>>> +
>>> +Â Â Â Â Â Â Â  switch (i) {
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_GFX:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->gfx.gfx_ring[0];
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_COMPUTE:
>>> +Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  rings[j] = &adev->gfx.compute_ring[j];
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = adev->gfx.num_compute_rings;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_DMA:
>>> +Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < adev->sdma.num_instances; ++j)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  rings[j] = &adev->sdma.instance[j].ring;
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = adev->sdma.num_instances;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_UVD:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->uvd.inst[0].ring;
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_VCE:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->vce.ring[0];
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_UVD_ENC:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->uvd.inst[0].ring_enc[0];
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_VCN_DEC:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->vcn.ring_dec;
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_VCN_ENC:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->vcn.ring_enc[0];
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  case AMDGPU_HW_IP_VCN_JPEG:
>>> +Â Â Â Â Â Â Â Â Â Â Â  rings[0] = &adev->vcn.ring_jpeg;
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings = 1;
>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>> +Â Â Â Â Â Â Â  }
>>> Â  -Â Â Â Â Â Â Â  if (ring == &adev->gfx.kiq.ring)
>>> -Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> +Â Â Â Â Â Â Â  for (j = 0; j < num_rings; ++j)
>>> +Â Â Â Â Â Â Â Â Â Â Â  rqs[j] = &rings[j]->sched.sched_rq[priority];
>>> Â  -Â Â Â Â Â Â Â  if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
>>> -Â Â Â Â Â Â Â Â Â Â Â  r = drm_sched_entity_init(&ctx->rings[i].entity,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sdma_rqs, num_sdma_rqs,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  &ctx->guilty);
>>> -Â Â Â Â Â Â Â  } else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
>>> -Â Â Â Â Â Â Â Â Â Â Â  r = drm_sched_entity_init(&ctx->rings[i].entity,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  comp_rqs, num_comp_rqs,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  &ctx->guilty);
>>> -Â Â Â Â Â Â Â  } else {
>>> -Â Â Â Â Â Â Â Â Â Â Â  struct drm_sched_rq *rq;
>>> -
>>> -Â Â Â Â Â Â Â Â Â Â Â  rq = &ring->sched.sched_rq[priority];
>>> -Â Â Â Â Â Â Â Â Â Â Â  r = drm_sched_entity_init(&ctx->rings[i].entity,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  &rq, 1, &ctx->guilty);
>>> -Â Â Â Â Â Â Â  }
>>> +Â Â Â Â Â Â Â  for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>>> +Â Â Â Â Â Â Â Â Â Â Â  r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  rqs, num_rings, &ctx->guilty);
>>> Â Â Â Â Â Â Â Â Â  if (r)
>>> -Â Â Â Â Â Â Â Â Â Â Â  goto failed;
>>> +Â Â Â Â Â Â Â Â Â Â Â  goto error_cleanup_entities;
>>> Â Â Â Â Â  }
>>> Â  Â Â Â Â Â  return 0;
>>> Â  -failed:
>>> -Â Â Â  for (j = 0; j < i; j++)
>>> - drm_sched_entity_destroy(&ctx->rings[j].entity);
>>> +error_cleanup_entities:
>>> +Â Â Â  for (i = 0; i < num_entities; ++i)
>>> + drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> +Â Â Â  kfree(ctx->entities[0]);
>>> +
>>> +error_free_fences:
>>> Â Â Â Â Â  kfree(ctx->fences);
>>> Â Â Â Â Â  ctx->fences = NULL;
>>> Â Â Â Â Â  return r;
>>> @@ -137,17 +191,18 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>> Â  static void amdgpu_ctx_fini(struct kref *ref)
>>> Â  {
>>> Â Â Â Â Â  struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> +Â Â Â  unsigned num_entities = amdgput_ctx_total_num_entities();
>>> Â Â Â Â Â  struct amdgpu_device *adev = ctx->adev;
>>> Â Â Â Â Â  unsigned i, j;
>>> Â  Â Â Â Â Â  if (!adev)
>>> Â Â Â Â Â Â Â Â Â  return;
>>> Â  -Â Â Â  for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
>>> +Â Â Â  for (i = 0; i < num_entities; ++i)
>>> Â Â Â Â Â Â Â Â Â  for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> -Â Â Â Â Â Â Â Â Â Â Â  dma_fence_put(ctx->rings[i].fences[j]);
>>> +Â Â Â Â Â Â Â Â Â Â Â  dma_fence_put(ctx->entities[0][i].fences[j]);
>>> Â Â Â Â Â  kfree(ctx->fences);
>>> -Â Â Â  ctx->fences = NULL;
>>> +Â Â Â  kfree(ctx->entities[0]);
>>> Â  Â Â Â Â Â  mutex_destroy(&ctx->lock);
>>> Â  @@ -157,9 +212,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
>>> Â  int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>>> instance,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  u32 ring, struct drm_sched_entity **entity)
>>> Â  {
>>> -Â Â Â  struct amdgpu_device *adev = ctx->adev;
>>> -Â Â Â  unsigned num_rings = 0;
>>> -Â Â Â  struct amdgpu_ring *out_ring;
>>> +Â Â Â  if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>> +Â Â Â Â Â Â Â  DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>> +Â Â Â Â Â Â Â  return -EINVAL;
>>> +Â Â Â  }
>>> Â  Â Â Â Â Â  /* Right now all IPs have only one instance - multiple 
>>> rings. */
>>> Â Â Â Â Â  if (instance != 0) {
>>> @@ -167,52 +223,12 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>>> *ctx, u32 hw_ip, u32 instance,
>>> Â Â Â Â Â Â Â Â Â  return -EINVAL;
>>> Â Â Â Â Â  }
>>> Â  -Â Â Â  switch (hw_ip) {
>>> -Â Â Â  case AMDGPU_HW_IP_GFX:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->gfx.gfx_ring[ring];
>>> -Â Â Â Â Â Â Â  num_rings = adev->gfx.num_gfx_rings;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_COMPUTE:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->gfx.compute_ring[ring];
>>> -Â Â Â Â Â Â Â  num_rings = adev->gfx.num_compute_rings;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_DMA:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->sdma.instance[ring].ring;
>>> -Â Â Â Â Â Â Â  num_rings = adev->sdma.num_instances;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_UVD:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->uvd.inst[0].ring;
>>> -Â Â Â Â Â Â Â  num_rings = adev->uvd.num_uvd_inst;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_VCE:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->vce.ring[ring];
>>> -Â Â Â Â Â Â Â  num_rings = adev->vce.num_rings;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_UVD_ENC:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->uvd.inst[0].ring_enc[ring];
>>> -Â Â Â Â Â Â Â  num_rings = adev->uvd.num_enc_rings;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_VCN_DEC:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->vcn.ring_dec;
>>> -Â Â Â Â Â Â Â  num_rings = 1;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_VCN_ENC:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->vcn.ring_enc[ring];
>>> -Â Â Â Â Â Â Â  num_rings = adev->vcn.num_enc_rings;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  case AMDGPU_HW_IP_VCN_JPEG:
>>> -Â Â Â Â Â Â Â  out_ring = &adev->vcn.ring_jpeg;
>>> -Â Â Â Â Â Â Â  num_rings = 1;
>>> -Â Â Â Â Â Â Â  break;
>>> -Â Â Â  default:
>>> -Â Â Â Â Â Â Â  DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>> +Â Â Â  if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
>>> +Â Â Â Â Â Â Â  DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
>>> Â Â Â Â Â Â Â Â Â  return -EINVAL;
>>> Â Â Â Â Â  }
>>> Â  -Â Â Â  if (ring > num_rings)
>>> -Â Â Â Â Â Â Â  return -EINVAL;
>>> -
>>> -Â Â Â  *entity = &ctx->rings[out_ring->idx].entity;
>>> +Â Â Â  *entity = &ctx->entities[hw_ip][ring].entity;
>>> Â Â Â Â Â  return 0;
>>> Â  }
>>> Â  @@ -252,17 +268,17 @@ static int amdgpu_ctx_alloc(struct 
>>> amdgpu_device *adev,
>>> Â  static void amdgpu_ctx_do_release(struct kref *ref)
>>> Â  {
>>> Â Â Â Â Â  struct amdgpu_ctx *ctx;
>>> +Â Â Â  unsigned num_entities;
>>> Â Â Â Â Â  u32 i;
>>> Â  Â Â Â Â Â  ctx = container_of(ref, struct amdgpu_ctx, refcount);
>>> Â  -Â Â Â  for (i = 0; i < ctx->adev->num_rings; i++) {
>>> +Â Â Â  num_entities = 0;
>>> +Â Â Â  for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
>>> +Â Â Â Â Â Â Â  num_entities += amdgpu_ctx_num_entities[i];
>>> Â  -Â Â Â Â Â Â Â  if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
>>> -Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> -
>>> - drm_sched_entity_destroy(&ctx->rings[i].entity);
>>> -Â Â Â  }
>>> +Â Â Â  for (i = 0; i < num_entities; i++)
>>> + drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> Â  Â Â Â Â Â  amdgpu_ctx_fini(ref);
>>> Â  }
>>> @@ -422,21 +438,21 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_sched_entity *entity,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct dma_fence *fence, uint64_t* handle)
>>> Â  {
>>> -Â Â Â  struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
>>> -Â Â Â  uint64_t seq = cring->sequence;
>>> +Â Â Â  struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
>>> +Â Â Â  uint64_t seq = centity->sequence;
>>> Â Â Â Â Â  struct dma_fence *other = NULL;
>>> Â Â Â Â Â  unsigned idx = 0;
>>> Â  Â Â Â Â Â  idx = seq & (amdgpu_sched_jobs - 1);
>>> -Â Â Â  other = cring->fences[idx];
>>> +Â Â Â  other = centity->fences[idx];
>>> Â Â Â Â Â  if (other)
>>> Â Â Â Â Â Â Â Â Â  BUG_ON(!dma_fence_is_signaled(other));
>>> Â  Â Â Â Â Â  dma_fence_get(fence);
>>> Â  Â Â Â Â Â  spin_lock(&ctx->ring_lock);
>>> -Â Â Â  cring->fences[idx] = fence;
>>> -Â Â Â  cring->sequence++;
>>> +Â Â Â  centity->fences[idx] = fence;
>>> +Â Â Â  centity->sequence++;
>>> Â Â Â Â Â  spin_unlock(&ctx->ring_lock);
>>> Â  Â Â Â Â Â  dma_fence_put(other);
>>> @@ -450,26 +466,26 @@ struct dma_fence *amdgpu_ctx_get_fence(struct 
>>> amdgpu_ctx *ctx,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_sched_entity *entity,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  uint64_t seq)
>>> Â  {
>>> -Â Â Â  struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
>>> +Â Â Â  struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
>>> Â Â Â Â Â  struct dma_fence *fence;
>>> Â  Â Â Â Â Â  spin_lock(&ctx->ring_lock);
>>> Â  Â Â Â Â Â  if (seq == ~0ull)
>>> -Â Â Â Â Â Â Â  seq = cring->sequence - 1;
>>> +Â Â Â Â Â Â Â  seq = centity->sequence - 1;
>>> Â  -Â Â Â  if (seq >= cring->sequence) {
>>> +Â Â Â  if (seq >= centity->sequence) {
>>> Â Â Â Â Â Â Â Â Â  spin_unlock(&ctx->ring_lock);
>>> Â Â Â Â Â Â Â Â Â  return ERR_PTR(-EINVAL);
>>> Â Â Â Â Â  }
>>> Â  Â  -Â Â Â  if (seq + amdgpu_sched_jobs < cring->sequence) {
>>> +Â Â Â  if (seq + amdgpu_sched_jobs < centity->sequence) {
>>> Â Â Â Â Â Â Â Â Â  spin_unlock(&ctx->ring_lock);
>>> Â Â Â Â Â Â Â Â Â  return NULL;
>>> Â Â Â Â Â  }
>>> Â  -Â Â Â  fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs 
>>> - 1)]);
>>> +Â Â Â  fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs 
>>> - 1)]);
>>> Â Â Â Â Â  spin_unlock(&ctx->ring_lock);
>>> Â  Â Â Â Â Â  return fence;
>>> @@ -478,23 +494,17 @@ struct dma_fence *amdgpu_ctx_get_fence(struct 
>>> amdgpu_ctx *ctx,
>>> Â  void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  enum drm_sched_priority priority)
>>> Â  {
>>> -Â Â Â  int i;
>>> -Â Â Â  struct amdgpu_device *adev = ctx->adev;
>>> -Â Â Â  struct drm_sched_entity *entity;
>>> -Â Â Â  struct amdgpu_ring *ring;
>>> +Â Â Â  unsigned num_entities = amdgput_ctx_total_num_entities();
>>> Â Â Â Â Â  enum drm_sched_priority ctx_prio;
>>> +Â Â Â  unsigned i;
>>> Â  Â Â Â Â Â  ctx->override_priority = priority;
>>> Â  Â Â Â Â Â  ctx_prio = (ctx->override_priority == 
>>> DRM_SCHED_PRIORITY_UNSET) ?
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  ctx->init_priority : ctx->override_priority;
>>> Â  -Â Â Â  for (i = 0; i < adev->num_rings; i++) {
>>> -Â Â Â Â Â Â Â  ring = adev->rings[i];
>>> -Â Â Â Â Â Â Â  entity = &ctx->rings[i].entity;
>>> -
>>> -Â Â Â Â Â Â Â  if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
>>> -Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> +Â Â Â  for (i = 0; i < num_entities; i++) {
>>> +Â Â Â Â Â Â Â  struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
>>> Â  Â Â Â Â Â Â Â Â Â  drm_sched_entity_set_priority(entity, ctx_prio);
>>> Â Â Â Â Â  }
>>> @@ -503,9 +513,9 @@ void amdgpu_ctx_priority_override(struct 
>>> amdgpu_ctx *ctx,
>>> Â  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  struct drm_sched_entity *entity)
>>> Â  {
>>> -Â Â Â  struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
>>> -Â Â Â  unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
>>> -Â Â Â  struct dma_fence *other = cring->fences[idx];
>>> +Â Â Â  struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
>>> +Â Â Â  unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
>>> +Â Â Â  struct dma_fence *other = centity->fences[idx];
>>> Â  Â Â Â Â Â  if (other) {
>>> Â Â Â Â Â Â Â Â Â  signed long r;
>>> @@ -529,6 +539,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr 
>>> *mgr)
>>> Â  Â  void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
>>> Â  {
>>> +Â Â Â  unsigned num_entities = amdgput_ctx_total_num_entities();
>>> Â Â Â Â Â  struct amdgpu_ctx *ctx;
>>> Â Â Â Â Â  struct idr *idp;
>>> Â Â Â Â Â  uint32_t id, i;
>>> @@ -544,13 +555,11 @@ void amdgpu_ctx_mgr_entity_flush(struct 
>>> amdgpu_ctx_mgr *mgr)
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  return;
>>> Â Â Â Â Â Â Â Â Â  }
>>> Â  -Â Â Â Â Â Â Â  for (i = 0; i < ctx->adev->num_rings; i++) {
>>> +Â Â Â Â Â Â Â  for (i = 0; i < num_entities; i++) {
>>> +Â Â Â Â Â Â Â Â Â Â Â  struct drm_sched_entity *entity;
>>> Â  -Â Â Â Â Â Â Â Â Â Â Â  if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> -
>>> -Â Â Â Â Â Â Â Â Â Â Â  max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  max_wait);
>>> +Â Â Â Â Â Â Â Â Â Â Â  entity = &ctx->entities[0][i].entity;
>>> +Â Â Â Â Â Â Â Â Â Â Â  max_wait = drm_sched_entity_flush(entity, max_wait);
>>> Â Â Â Â Â Â Â Â Â  }
>>> Â Â Â Â Â  }
>>> Â Â Â Â Â  mutex_unlock(&mgr->lock);
>>> @@ -558,6 +567,7 @@ void amdgpu_ctx_mgr_entity_flush(struct 
>>> amdgpu_ctx_mgr *mgr)
>>> Â  Â  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>>> Â  {
>>> +Â Â Â  unsigned num_entities = amdgput_ctx_total_num_entities();
>>> Â Â Â Â Â  struct amdgpu_ctx *ctx;
>>> Â Â Â Â Â  struct idr *idp;
>>> Â Â Â Â Â  uint32_t id, i;
>>> @@ -569,16 +579,13 @@ void amdgpu_ctx_mgr_entity_fini(struct 
>>> amdgpu_ctx_mgr *mgr)
>>> Â Â Â Â Â Â Â Â Â  if (!ctx->adev)
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  return;
>>> Â  -Â Â Â Â Â Â Â  for (i = 0; i < ctx->adev->num_rings; i++) {
>>> -
>>> -Â Â Â Â Â Â Â Â Â Â Â  if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> -
>>> -Â Â Â Â Â Â Â Â Â Â Â  if (kref_read(&ctx->refcount) == 1)
>>> - drm_sched_entity_fini(&ctx->rings[i].entity);
>>> -Â Â Â Â Â Â Â Â Â Â Â  else
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  DRM_ERROR("ctx %p is still alive\n", ctx);
>>> +Â Â Â Â Â Â Â  if (kref_read(&ctx->refcount) != 1) {
>>> +Â Â Â Â Â Â Â Â Â Â Â  DRM_ERROR("ctx %p is still alive\n", ctx);
>>> +Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> Â Â Â Â Â Â Â Â Â  }
>>> +
>>> +Â Â Â Â Â Â Â  for (i = 0; i < num_entities; i++)
>>> + drm_sched_entity_fini(&ctx->entities[0][i].entity);
>>> Â Â Â Â Â  }
>>> Â  }
>>> Â  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> index 609f925b076c..d67c1d285a4f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>>> @@ -29,26 +29,26 @@ struct drm_device;
>>> Â  struct drm_file;
>>> Â  struct amdgpu_fpriv;
>>> Â  -struct amdgpu_ctx_ring {
>>> +struct amdgpu_ctx_entity {
>>> Â Â Â Â Â  uint64_tÂ Â Â Â Â Â Â  sequence;
>>> Â Â Â Â Â  struct dma_fenceÂ Â Â  **fences;
>>> Â Â Â Â Â  struct drm_sched_entityÂ Â Â  entity;
>>> Â  };
>>> Â  Â  struct amdgpu_ctx {
>>> -Â Â Â  struct krefÂ Â Â Â Â Â Â  refcount;
>>> -Â Â Â  struct amdgpu_deviceÂ Â Â  *adev;
>>> -Â Â Â  unsignedÂ Â Â Â Â Â Â  reset_counter;
>>> -Â Â Â  unsignedÂ Â Â Â Â Â Â  reset_counter_query;
>>> -Â Â Â  uint32_tÂ Â Â Â Â Â Â  vram_lost_counter;
>>> -Â Â Â  spinlock_tÂ Â Â Â Â Â Â  ring_lock;
>>> -Â Â Â  struct dma_fenceÂ Â Â  **fences;
>>> -Â Â Â  struct amdgpu_ctx_ringÂ Â Â  rings[AMDGPU_MAX_RINGS];
>>> -Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â  preamble_presented;
>>> -Â Â Â  enum drm_sched_priority init_priority;
>>> -Â Â Â  enum drm_sched_priority override_priority;
>>> -Â Â Â  struct mutexÂ Â Â Â Â Â Â Â Â Â Â  lock;
>>> -Â Â Â  atomic_tÂ Â Â Â Â Â Â  guilty;
>>> +Â Â Â  struct krefÂ Â Â Â Â Â Â Â Â Â Â  refcount;
>>> +Â Â Â  struct amdgpu_deviceÂ Â Â Â Â Â Â  *adev;
>>> +Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â  reset_counter;
>>> +Â Â Â  unsignedÂ Â Â Â Â Â Â Â Â Â Â  reset_counter_query;
>>> +Â Â Â  uint32_tÂ Â Â Â Â Â Â Â Â Â Â  vram_lost_counter;
>>> +Â Â Â  spinlock_tÂ Â Â Â Â Â Â Â Â Â Â  ring_lock;
>>> +Â Â Â  struct dma_fenceÂ Â Â Â Â Â Â  **fences;
>>> +Â Â Â  struct amdgpu_ctx_entityÂ Â Â  *entities[AMDGPU_HW_IP_NUM];
>>> +Â Â Â  boolÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â  preamble_presented;
>>> +Â Â Â  enum drm_sched_priorityÂ Â Â Â Â Â Â  init_priority;
>>> +Â Â Â  enum drm_sched_priorityÂ Â Â Â Â Â Â  override_priority;
>>> +Â Â Â  struct mutexÂ Â Â Â Â Â Â Â Â Â Â  lock;
>>> +Â Â Â  atomic_tÂ Â Â Â Â Â Â Â Â Â Â  guilty;
>>> Â  };
>>> Â  Â  struct amdgpu_ctx_mgr {
>>> @@ -58,6 +58,8 @@ struct amdgpu_ctx_mgr {
>>> Â Â Â Â Â  struct idrÂ Â Â Â Â Â Â  ctx_handles;
>>> Â  };
>>> Â  +extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
>>> +
>>> Â  struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, 
>>> uint32_t id);
>>> Â  int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
>>> Â  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index 40fd591c9980..82177baade96 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -270,7 +270,7 @@ static int amdgpu_hw_ip_info(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â  uint32_t ib_start_alignment = 0;
>>> Â Â Â Â Â  uint32_t ib_size_alignment = 0;
>>> Â Â Â Â Â  enum amd_ip_block_type type;
>>> -Â Â Â  uint32_t ring_mask = 0;
>>> +Â Â Â  unsigned int num_rings = 0;
>>> Â Â Â Â Â  unsigned int i, j;
>>> Â  Â Â Â Â Â  if (info->query_hw_ip.ip_instance >= 
>>> AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
>>> @@ -280,21 +280,24 @@ static int amdgpu_hw_ip_info(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â  case AMDGPU_HW_IP_GFX:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_GFX;
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>>> -Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->gfx.gfx_ring[i].ready << i;
>>> +Â Â Â Â Â Â Â Â Â Â Â  if (adev->gfx.gfx_ring[i].ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 32;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 32;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> Â Â Â Â Â  case AMDGPU_HW_IP_COMPUTE:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_GFX;
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->gfx.num_compute_rings; i++)
>>> -Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->gfx.compute_ring[i].ready << i;
>>> +Â Â Â Â Â Â Â Â Â Â Â  if (adev->gfx.compute_ring[i].ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 32;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 32;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> Â Â Â Â Â  case AMDGPU_HW_IP_DMA:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_SDMA;
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->sdma.num_instances; i++)
>>> -Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->sdma.instance[i].ring.ready << i;
>>> +Â Â Â Â Â Â Â Â Â Â Â  if (adev->sdma.instance[i].ring.ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 256;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 4;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> @@ -303,7 +306,9 @@ static int amdgpu_hw_ip_info(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (adev->uvd.harvest_config & (1 << i))
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> -Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->uvd.inst[i].ring.ready;
>>> +
>>> +Â Â Â Â Â Â Â Â Â Â Â  if (adev->uvd.inst[i].ring.ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  }
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 64;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 64;
>>> @@ -311,7 +316,8 @@ static int amdgpu_hw_ip_info(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â  case AMDGPU_HW_IP_VCE:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_VCE;
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->vce.num_rings; i++)
>>> -Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->vce.ring[i].ready << i;
>>> +Â Â Â Â Â Â Â Â Â Â Â  if (adev->vce.ring[i].ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 4;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 1;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> @@ -320,28 +326,33 @@ static int amdgpu_hw_ip_info(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (adev->uvd.harvest_config & (1 << i))
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  continue;
>>> +
>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  for (j = 0; j < adev->uvd.num_enc_rings; j++)
>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j;
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (adev->uvd.inst[i].ring_enc[j].ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  }
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 64;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 64;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> Â Â Â Â Â  case AMDGPU_HW_IP_VCN_DEC:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_VCN;
>>> -Â Â Â Â Â Â Â  ring_mask = adev->vcn.ring_dec.ready;
>>> +Â Â Â Â Â Â Â  if (adev->vcn.ring_dec.ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 16;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 16;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> Â Â Â Â Â  case AMDGPU_HW_IP_VCN_ENC:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_VCN;
>>> Â Â Â Â Â Â Â Â Â  for (i = 0; i < adev->vcn.num_enc_rings; i++)
>>> -Â Â Â Â Â Â Â Â Â Â Â  ring_mask |= adev->vcn.ring_enc[i].ready << i;
>>> +Â Â Â Â Â Â Â Â Â Â Â  if (adev->vcn.ring_enc[i].ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 64;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 1;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> Â Â Â Â Â  case AMDGPU_HW_IP_VCN_JPEG:
>>> Â Â Â Â Â Â Â Â Â  type = AMD_IP_BLOCK_TYPE_VCN;
>>> -Â Â Â Â Â Â Â  ring_mask = adev->vcn.ring_jpeg.ready;
>>> +Â Â Â Â Â Â Â  if (adev->vcn.ring_jpeg.ready)
>>> +Â Â Â Â Â Â Â Â Â Â Â  ++num_rings;
>>> Â Â Â Â Â Â Â Â Â  ib_start_alignment = 16;
>>> Â Â Â Â Â Â Â Â Â  ib_size_alignment = 16;
>>> Â Â Â Â Â Â Â Â Â  break;
>>> @@ -357,10 +368,13 @@ static int amdgpu_hw_ip_info(struct 
>>> amdgpu_device *adev,
>>> Â Â Â Â Â  if (i == adev->num_ip_blocks)
>>> Â Â Â Â Â Â Â Â Â  return 0;
>>> Â  +Â Â Â  num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
>>> +Â Â Â Â Â Â Â Â Â Â Â  num_rings);
>>> +
>>> Â Â Â Â Â  result->hw_ip_version_major = adev->ip_blocks[i].version->major;
>>> Â Â Â Â Â  result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
>>> Â Â Â Â Â  result->capabilities_flags = 0;
>>> -Â Â Â  result->available_rings = ring_mask;
>>> +Â Â Â  result->available_rings = (1 << num_rings) - 1;
>>> Â Â Â Â Â  result->ib_start_alignment = ib_start_alignment;
>>> Â Â Â Â Â  result->ib_size_alignment = ib_size_alignment;
>>> Â Â Â Â Â  return 0;
>>
>