Re: [PATCH 2/2] drm/amdgpu: load balance VCN3 decode as well v8

"Jiang, Sonny" <Sonny.Jiang@xxxxxxx> · Fri, 5 Mar 2021 13:52:13 +0000

[AMD Official Use Only - Internal Distribution Only]






Reviewed-by: Sonny Jiang <sonny.jiang@xxxxxxx>




From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx>

Sent: Friday, March 5, 2021 7:51 AM

To: dri-devel@xxxxxxxxxxxxxxxxxxxxx <dri-devel@xxxxxxxxxxxxxxxxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>

Cc: Liu, Leo <Leo.Liu@xxxxxxx>; Jiang, Sonny <Sonny.Jiang@xxxxxxx>

Subject: [PATCH 2/2] drm/amdgpu: load balance VCN3 decode as well v8
 


Add VCN3 IB parsing to figure out to which instance we can send the

stream for decode.



v2: remove VCN instance limit as well, fix amdgpu_cs_find_mapping,

    check supported formats instead of unsupported.

v3: fix typo and error handling

v4: make sure the message BO is CPU accessible

v5: fix addr calculation once more

v6: only check message buffers

v7: fix constant and use defines

v8: fix create msg calculation



Signed-off-by: Christian König <christian.koenig@xxxxxxx>

---

 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 132 +++++++++++++++++++++++++-

 1 file changed, 130 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c

index b33f513fd2ac..77932003b4c1 100644

--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c

+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c

@@ -50,6 +50,9 @@

 #define VCN_INSTANCES_SIENNA_CICHLID                            2

 #define DEC_SW_RING_ENABLED                                     FALSE

 

+#define RDECODE_MSG_CREATE                                     0x00000000

+#define RDECODE_MESSAGE_CREATE                                 0x00000001

+

 static int amdgpu_ih_clientid_vcns[] = {

         SOC15_IH_CLIENTID_VCN,

         SOC15_IH_CLIENTID_VCN1

@@ -208,8 +211,6 @@ static int vcn_v3_0_sw_init(void *handle)

                 } else {

                         ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;

                 }

-               if (adev->asic_type == CHIP_SIENNA_CICHLID && i != 0)

-                       ring->no_scheduler = true;

                 sprintf(ring->name, "vcn_dec_%d", i);

                 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,

                                      AMDGPU_RING_PRIO_DEFAULT,

@@ -1825,6 +1826,132 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {

         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,

 };

 

+static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)

+{

+       struct drm_gpu_scheduler **scheds;

+

+       /* The create msg must be in the first IB submitted */

+       if (atomic_read(&p->entity->fence_seq))

+               return -EINVAL;

+

+       scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]

+               [AMDGPU_RING_PRIO_DEFAULT].sched;

+       drm_sched_entity_modify_sched(p->entity, scheds, 1);

+       return 0;

+}

+

+static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)

+{

+       struct ttm_operation_ctx ctx = { false, false };

+       struct amdgpu_bo_va_mapping *map;

+       uint32_t *msg, num_buffers;

+       struct amdgpu_bo *bo;

+       uint64_t start, end;

+       unsigned int i;

+       void * ptr;

+       int r;

+

+       addr &= AMDGPU_GMC_HOLE_MASK;

+       r = amdgpu_cs_find_mapping(p, addr, &bo, &map);

+       if (r) {

+               DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);

+               return r;

+       }

+

+       start = map->start * AMDGPU_GPU_PAGE_SIZE;

+       end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;

+       if (addr & 0x7) {

+               DRM_ERROR("VCN messages must be 8 byte aligned!\n");

+               return -EINVAL;

+       }

+

+       bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

+       amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);

+       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

+       if (r) {

+               DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);

+               return r;

+       }

+

+       r = amdgpu_bo_kmap(bo, &ptr);

+       if (r) {

+               DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);

+               return r;

+       }

+

+       msg = ptr + addr - start;

+

+       /* Check length */

+       if (msg[1] > end - addr) {

+               r = -EINVAL;

+               goto out;

+       }

+

+       if (msg[3] != RDECODE_MSG_CREATE)

+               goto out;

+

+       num_buffers = msg[2];

+       for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {

+               uint32_t offset, size, *create;

+

+               if (msg[0] != RDECODE_MESSAGE_CREATE)

+                       continue;

+

+               offset = msg[1];

+               size = msg[2];

+

+               if (offset + size > end) {

+                       r = -EINVAL;

+                       goto out;

+               }

+

+               create = ptr + addr + offset - start;

+

+               /* H246, HEVC and VP9 can run on any instance */

+               if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)

+                       continue;

+

+               r = vcn_v3_0_limit_sched(p);

+               if (r)

+                       goto out;

+       }

+

+out:

+       amdgpu_bo_kunmap(bo);

+       return r;

+}

+

+static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,

+                                          uint32_t ib_idx)

+{

+       struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);

+       struct amdgpu_ib *ib = &p->job->ibs[ib_idx];

+       uint32_t msg_lo = 0, msg_hi = 0;

+       unsigned i;

+       int r;

+

+       /* The first instance can decode anything */

+       if (!ring->me)

+               return 0;

+

+       for (i = 0; i < ib->length_dw; i += 2) {

+               uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);

+               uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1);

+

+               if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {

+                       msg_lo = val;

+               } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {

+                       msg_hi = val;

+               } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&

+                          val == 0) {

+                       r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);

+                       if (r)

+                               return r;

+               }

+       }

+       return 0;

+}

+

 static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {

         .type = AMDGPU_RING_TYPE_VCN_DEC,

         .align_mask = 0xf,

@@ -1832,6 +1959,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {

         .get_rptr = vcn_v3_0_dec_ring_get_rptr,

         .get_wptr = vcn_v3_0_dec_ring_get_wptr,

         .set_wptr = vcn_v3_0_dec_ring_set_wptr,

+       .patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,

         .emit_frame_size =

                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +

                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +

-- 

2.25.1








_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel