Ensuring accurate IB package searching and covers more corners for AV1 encoding requests. Signed-off-by: David (Ming Qiang) Wu <David.Wu3@xxxxxxx> Reviewed-by: Ruijing Dong <ruijing.dong@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 81 +++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 22a41766a8c7..8235ff3820ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1726,6 +1726,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, #define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) +#define RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE (0x00000004) #define RADEON_VCN_ENGINE_INFO (0x30000001) #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 @@ -1733,21 +1734,86 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, #define RENCODE_ENCODE_STANDARD_AV1 2 #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 +#define RENCODE_IB_ENC_QUE_INSTRUCTION (0x32000001) +#define RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET 64 /* return the offset in ib if id is found, -1 otherwise * to speed up the searching we only search upto max_offset */ -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +static int vcn_v4_0_enc_find_ib_param(uint32_t *ptr, int size, uint32_t id, int max_offset) { int i; - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { - if (ib->ptr[i + 1] == id) + for (i = 0; i < size && i < max_offset && ptr[i] >= 8; i += ptr[i] / 4) { + if (ptr[i + 1] == id) return i; } return -1; } +static int vcn_v4_0_enc_queue_msg(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + struct amdgpu_bo *bo; + uint64_t start, end; + int i; + void *ptr; + int r; + int data_size = 0; + uint64_t addr; + uint32_t *msg; + + i = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RENCODE_IB_ENC_QUE_INSTRUCTION, + RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET); + if (i >= 0) { + addr = ((uint64_t)ib->ptr[i + 3]) << 32 | ib->ptr[i + 2]; + data_size = ib->ptr[i + 4]; + } + + if (!data_size) /* did not find */ + return 0; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; /* IB with SESSION_INIT */ + i = vcn_v4_0_enc_find_ib_param(msg, data_size, RENCODE_IB_PARAM_SESSION_INIT, + RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); + if (i >= 0 && msg[i + 2] == RENCODE_ENCODE_STANDARD_AV1) + r = vcn_v4_0_limit_sched(p, job); + + amdgpu_bo_kunmap(bo); + return r; +} + static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) @@ -1763,12 +1829,13 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, + idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RADEON_VCN_ENGINE_INFO, RADEON_VCN_ENGINE_INFO_MAX_OFFSET); if (idx < 0) /* engine info is missing */ return 0; val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; @@ -1779,10 +1846,12 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, decode_buffer->msg_buffer_address_lo; return vcn_v4_0_dec_msg(p, job, addr); } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); + idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, + RENCODE_IB_PARAM_SESSION_INIT, RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) return vcn_v4_0_limit_sched(p, job); + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE) { + return vcn_v4_0_enc_queue_msg(p, job, ib); } return 0; } -- 2.34.1