[AMD Official Use Only - General] Thanks Christian, This is just to cover possible valid ways in kernel as a preparation, av1 encoding in Mesa is still under developing. Thanks, Ruijing -----Original Message----- From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx> Sent: Thursday, February 23, 2023 1:48 AM To: Wu, David <David.Wu3@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Koenig, Christian <Christian.Koenig@xxxxxxx> Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Dong, Ruijing <Ruijing.Dong@xxxxxxx>; Liu, Leo <Leo.Liu@xxxxxxx> Subject: Re: [PATCH] drm/amdgpu: support more AV1 encoding requests Am 23.02.23 um 00:11 schrieb David (Ming Qiang) Wu: > Ensuring accurate IB package searching and covers more corners for AV1 > encoding requests. That at least looks much cleaner now. Do we already have the Mesa patches ready which use this? Regards, Christian. > > Signed-off-by: David (Ming Qiang) Wu <David.Wu3@xxxxxxx> > Reviewed-by: Ruijing Dong <ruijing.dong@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 81 +++++++++++++++++++++++++-- > 1 file changed, 75 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > index 22a41766a8c7..8235ff3820ed 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > @@ -1726,6 +1726,7 @@ static int vcn_v4_0_dec_msg(struct > amdgpu_cs_parser *p, struct amdgpu_job *job, > > #define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) > #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) > +#define RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE (0x00000004) > > #define RADEON_VCN_ENGINE_INFO (0x30000001) > #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 > @@ -1733,21 +1734,86 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, > #define RENCODE_ENCODE_STANDARD_AV1 2 > #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 > #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 > +#define RENCODE_IB_ENC_QUE_INSTRUCTION (0x32000001) > +#define RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET 64 > > /* return the offset in ib if id is found, -1 otherwise > * to speed up the searching we only search upto max_offset > */ > -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t > id, int max_offset) > +static int vcn_v4_0_enc_find_ib_param(uint32_t *ptr, int size, > +uint32_t id, int max_offset) > { > int i; > > - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { > - if (ib->ptr[i + 1] == id) > + for (i = 0; i < size && i < max_offset && ptr[i] >= 8; i += ptr[i] / 4) { > + if (ptr[i + 1] == id) > return i; > } > return -1; > } > > +static int vcn_v4_0_enc_queue_msg(struct amdgpu_cs_parser *p, > + struct amdgpu_job *job, > + struct amdgpu_ib *ib) > +{ > + struct ttm_operation_ctx ctx = { false, false }; > + struct amdgpu_bo_va_mapping *map; > + struct amdgpu_bo *bo; > + uint64_t start, end; > + int i; > + void *ptr; > + int r; > + int data_size = 0; > + uint64_t addr; > + uint32_t *msg; > + > + i = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RENCODE_IB_ENC_QUE_INSTRUCTION, > + RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET); > + if (i >= 0) { > + addr = ((uint64_t)ib->ptr[i + 3]) << 32 | ib->ptr[i + 2]; > + data_size = ib->ptr[i + 4]; > + } > + > + if (!data_size) /* did not find */ > + return 0; > + > + addr &= AMDGPU_GMC_HOLE_MASK; > + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); > + if (r) { > + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); > + return r; > + } > + > + start = map->start * AMDGPU_GPU_PAGE_SIZE; > + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; > + if (addr & 0x7) { > + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); > + return -EINVAL; > + } > + > + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; > + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); > + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > + if (r) { > + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); > + return r; > + } > + > + r = amdgpu_bo_kmap(bo, &ptr); > + if (r) { > + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); > + return r; > + } > + > + msg = ptr + addr - start; /* IB with SESSION_INIT */ > + i = vcn_v4_0_enc_find_ib_param(msg, data_size, RENCODE_IB_PARAM_SESSION_INIT, > + RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); > + if (i >= 0 && msg[i + 2] == RENCODE_ENCODE_STANDARD_AV1) > + r = vcn_v4_0_limit_sched(p, job); > + > + amdgpu_bo_kunmap(bo); > + return r; > +} > + > static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, > struct amdgpu_job *job, > struct amdgpu_ib *ib) > @@ -1763,12 +1829,13 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, > return 0; > > /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ > - idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, > + idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, > +RADEON_VCN_ENGINE_INFO, > RADEON_VCN_ENGINE_INFO_MAX_OFFSET); > if (idx < 0) /* engine info is missing */ > return 0; > > val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE > */ > + > if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { > decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + > 6]; > > @@ -1779,10 +1846,12 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, > decode_buffer->msg_buffer_address_lo; > return vcn_v4_0_dec_msg(p, job, addr); > } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { > - idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, > - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); > + idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, > + RENCODE_IB_PARAM_SESSION_INIT, > +RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); > if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) > return vcn_v4_0_limit_sched(p, job); > + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE) { > + return vcn_v4_0_enc_queue_msg(p, job, ib); > } > return 0; > }