Re: [PATCH v2] drm/amdgpu: limiting AV1 to first instance on VCN4 decode

"Zhu, James" <James.Zhu@xxxxxxx> · Wed, 13 Jul 2022 17:03:57 +0000

[AMD Official Use Only - General]







This patch 
is Reviewed-by: 
James Zhu <James.Zhu@xxxxxxx>













From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> on behalf of Sonny Jiang <sonny.jiang@xxxxxxx>

Sent: Wednesday, July 13, 2022 11:59 AM

To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>

Cc: Jiang, Sonny <Sonny.Jiang@xxxxxxx>

Subject: [PATCH v2] drm/amdgpu: limiting AV1 to first instance on VCN4 decode
 


AV1 is only supported on first instance.



Signed-off-by: Sonny Jiang <sonny.jiang@xxxxxxx>

---

 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 131 ++++++++++++++++++++++++++

 1 file changed, 131 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

index 84ac2401895a..a91ffbf902d4 100644

--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

@@ -25,6 +25,7 @@

 #include "amdgpu.h"

 #include "amdgpu_vcn.h"

 #include "amdgpu_pm.h"

+#include "amdgpu_cs.h"

 #include "soc15.h"

 #include "soc15d.h"

 #include "soc15_hw_ip.h"

@@ -44,6 +45,9 @@

 #define VCN_VID_SOC_ADDRESS_2_0                                                 0x1fb00

 #define VCN1_VID_SOC_ADDRESS_3_0                                                0x48300

 

+#define RDECODE_MSG_CREATE                                                     0x00000000

+#define RDECODE_MESSAGE_CREATE                                                 0x00000001

+

 static int amdgpu_ih_clientid_vcns[] = {

         SOC15_IH_CLIENTID_VCN,

         SOC15_IH_CLIENTID_VCN1

@@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)

         }

 }

 

+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)

+{

+       struct drm_gpu_scheduler **scheds;

+

+       /* The create msg must be in the first IB submitted */

+       if (atomic_read(&p->entity->fence_seq))

+               return -EINVAL;

+

+       scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]

+               [AMDGPU_RING_PRIO_0].sched;

+       drm_sched_entity_modify_sched(p->entity, scheds, 1);

+       return 0;

+}

+

+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)

+{

+       struct ttm_operation_ctx ctx = { false, false };

+       struct amdgpu_bo_va_mapping *map;

+       uint32_t *msg, num_buffers;

+       struct amdgpu_bo *bo;

+       uint64_t start, end;

+       unsigned int i;

+       void *ptr;

+       int r;

+

+       addr &= AMDGPU_GMC_HOLE_MASK;

+       r = amdgpu_cs_find_mapping(p, addr, &bo, &map);

+       if (r) {

+               DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);

+               return r;

+       }

+

+       start = map->start * AMDGPU_GPU_PAGE_SIZE;

+       end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;

+       if (addr & 0x7) {

+               DRM_ERROR("VCN messages must be 8 byte aligned!\n");

+               return -EINVAL;

+       }

+

+       bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

+       amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);

+       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

+       if (r) {

+               DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);

+               return r;

+       }

+

+       r = amdgpu_bo_kmap(bo, &ptr);

+       if (r) {

+               DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);

+               return r;

+       }

+

+       msg = ptr + addr - start;

+

+       /* Check length */

+       if (msg[1] > end - addr) {

+               r = -EINVAL;

+               goto out;

+       }

+

+       if (msg[3] != RDECODE_MSG_CREATE)

+               goto out;

+

+       num_buffers = msg[2];

+       for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {

+               uint32_t offset, size, *create;

+

+               if (msg[0] != RDECODE_MESSAGE_CREATE)

+                       continue;

+

+               offset = msg[1];

+               size = msg[2];

+

+               if (offset + size > end) {

+                       r = -EINVAL;

+                       goto out;

+               }

+

+               create = ptr + addr + offset - start;

+

+               /* H246, HEVC and VP9 can run on any instance */

+               if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)

+                       continue;

+

+               r = vcn_v4_0_limit_sched(p);

+               if (r)

+                       goto out;

+       }

+

+out:

+       amdgpu_bo_kunmap(bo);

+       return r;

+}

+

+#define RADEON_VCN_ENGINE_TYPE_DECODE                                 (0x00000003)

+

+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,

+                               struct amdgpu_job *job,

+                               struct amdgpu_ib *ib)

+{

+       struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);

+       struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;

+       uint32_t val;

+       int r = 0;

+

+       /* The first instance can decode anything */

+       if (!ring->me)

+               return r;

+

+       /* unified queue ib header has 8 double words. */

+       if (ib->length_dw < 8)

+               return r;

+

+       val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE

+

+       if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {

+               decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];

+

+               if (decode_buffer->valid_buf_flag  & 0x1)

+                       r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 |

+                                               decode_buffer->msg_buffer_address_lo);

+       }

+       return r;

+}

+

 static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {

         .type = AMDGPU_RING_TYPE_VCN_ENC,

         .align_mask = 0x3f,

@@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {

         .get_rptr = vcn_v4_0_unified_ring_get_rptr,

         .get_wptr = vcn_v4_0_unified_ring_get_wptr,

         .set_wptr = vcn_v4_0_unified_ring_set_wptr,

+       .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,

         .emit_frame_size =

                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +

                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +

-- 

2.36.1