[PATCH] drm/amdgpu: support more AV1 encoding requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ensuring accurate IB package searching and covers
more corners for AV1 encoding requests.

Signed-off-by: David (Ming Qiang) Wu <David.Wu3@xxxxxxx>
Reviewed-by: Ruijing Dong <ruijing.dong@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 81 +++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 22a41766a8c7..8235ff3820ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1726,6 +1726,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
 
 #define RADEON_VCN_ENGINE_TYPE_ENCODE			(0x00000002)
 #define RADEON_VCN_ENGINE_TYPE_DECODE			(0x00000003)
+#define RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE		(0x00000004)
 
 #define RADEON_VCN_ENGINE_INFO				(0x30000001)
 #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET		16
@@ -1733,21 +1734,86 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
 #define RENCODE_ENCODE_STANDARD_AV1			2
 #define RENCODE_IB_PARAM_SESSION_INIT			0x00000003
 #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET	64
+#define RENCODE_IB_ENC_QUE_INSTRUCTION			(0x32000001)
+#define RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET	64
 
 /* return the offset in ib if id is found, -1 otherwise
  * to speed up the searching we only search upto max_offset
  */
-static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
+static int vcn_v4_0_enc_find_ib_param(uint32_t *ptr, int size, uint32_t id, int max_offset)
 {
 	int i;
 
-	for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
-		if (ib->ptr[i + 1] == id)
+	for (i = 0; i < size && i < max_offset && ptr[i] >= 8; i += ptr[i] / 4) {
+		if (ptr[i + 1] == id)
 			return i;
 	}
 	return -1;
 }
 
+static int vcn_v4_0_enc_queue_msg(struct amdgpu_cs_parser *p,
+				  struct amdgpu_job *job,
+				  struct amdgpu_ib *ib)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo_va_mapping *map;
+	struct amdgpu_bo *bo;
+	uint64_t start, end;
+	int i;
+	void *ptr;
+	int r;
+	int data_size = 0;
+	uint64_t addr;
+	uint32_t *msg;
+
+	i = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RENCODE_IB_ENC_QUE_INSTRUCTION,
+		RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET);
+	if (i >= 0) {
+		addr = ((uint64_t)ib->ptr[i + 3]) << 32 | ib->ptr[i + 2];
+		data_size = ib->ptr[i + 4];
+	}
+
+	if (!data_size) /* did not find */
+		return 0;
+
+	addr &= AMDGPU_GMC_HOLE_MASK;
+	r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+	if (r) {
+		DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+		return r;
+	}
+
+	start = map->start * AMDGPU_GPU_PAGE_SIZE;
+	end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+	if (addr & 0x7) {
+		DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+		return -EINVAL;
+	}
+
+	bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (r) {
+		DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, &ptr);
+	if (r) {
+		DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+		return r;
+	}
+
+	msg = ptr + addr - start; /* IB with SESSION_INIT */
+	i = vcn_v4_0_enc_find_ib_param(msg, data_size, RENCODE_IB_PARAM_SESSION_INIT,
+		RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+	if (i >= 0 && msg[i + 2] == RENCODE_ENCODE_STANDARD_AV1)
+		r = vcn_v4_0_limit_sched(p, job);
+
+	amdgpu_bo_kunmap(bo);
+	return r;
+}
+
 static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
 					   struct amdgpu_job *job,
 					   struct amdgpu_ib *ib)
@@ -1763,12 +1829,13 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
 		return 0;
 
 	/* RADEON_VCN_ENGINE_INFO is at the top of ib block */
-	idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
+	idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RADEON_VCN_ENGINE_INFO,
 			RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
 	if (idx < 0) /* engine info is missing */
 		return 0;
 
 	val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+
 	if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
 		decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
 
@@ -1779,10 +1846,12 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
 			decode_buffer->msg_buffer_address_lo;
 		return vcn_v4_0_dec_msg(p, job, addr);
 	} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
-		idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
-			RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+		idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw,
+			RENCODE_IB_PARAM_SESSION_INIT, RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
 		if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
 			return vcn_v4_0_limit_sched(p, job);
+	} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE) {
+		return vcn_v4_0_enc_queue_msg(p, job, ib);
 	}
 	return 0;
 }
-- 
2.34.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux