Re: [PATCH] drm/amdgpu: support more AV1 encoding requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 23.02.23 um 00:11 schrieb David (Ming Qiang) Wu:
Ensuring accurate IB package searching and covers
more corners for AV1 encoding requests.

That at least looks much cleaner now. Do we already have the Mesa patches ready which use this?

Regards,
Christian.


Signed-off-by: David (Ming Qiang) Wu <David.Wu3@xxxxxxx>
Reviewed-by: Ruijing Dong <ruijing.dong@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 81 +++++++++++++++++++++++++--
  1 file changed, 75 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 22a41766a8c7..8235ff3820ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1726,6 +1726,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
  #define RADEON_VCN_ENGINE_TYPE_DECODE			(0x00000003)
+#define RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE		(0x00000004)
#define RADEON_VCN_ENGINE_INFO (0x30000001)
  #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET		16
@@ -1733,21 +1734,86 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
  #define RENCODE_ENCODE_STANDARD_AV1			2
  #define RENCODE_IB_PARAM_SESSION_INIT			0x00000003
  #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET	64
+#define RENCODE_IB_ENC_QUE_INSTRUCTION			(0x32000001)
+#define RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET	64
/* return the offset in ib if id is found, -1 otherwise
   * to speed up the searching we only search upto max_offset
   */
-static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
+static int vcn_v4_0_enc_find_ib_param(uint32_t *ptr, int size, uint32_t id, int max_offset)
  {
  	int i;
- for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
-		if (ib->ptr[i + 1] == id)
+	for (i = 0; i < size && i < max_offset && ptr[i] >= 8; i += ptr[i] / 4) {
+		if (ptr[i + 1] == id)
  			return i;
  	}
  	return -1;
  }
+static int vcn_v4_0_enc_queue_msg(struct amdgpu_cs_parser *p,
+				  struct amdgpu_job *job,
+				  struct amdgpu_ib *ib)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo_va_mapping *map;
+	struct amdgpu_bo *bo;
+	uint64_t start, end;
+	int i;
+	void *ptr;
+	int r;
+	int data_size = 0;
+	uint64_t addr;
+	uint32_t *msg;
+
+	i = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RENCODE_IB_ENC_QUE_INSTRUCTION,
+		RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET);
+	if (i >= 0) {
+		addr = ((uint64_t)ib->ptr[i + 3]) << 32 | ib->ptr[i + 2];
+		data_size = ib->ptr[i + 4];
+	}
+
+	if (!data_size) /* did not find */
+		return 0;
+
+	addr &= AMDGPU_GMC_HOLE_MASK;
+	r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+	if (r) {
+		DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+		return r;
+	}
+
+	start = map->start * AMDGPU_GPU_PAGE_SIZE;
+	end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+	if (addr & 0x7) {
+		DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+		return -EINVAL;
+	}
+
+	bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (r) {
+		DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, &ptr);
+	if (r) {
+		DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+		return r;
+	}
+
+	msg = ptr + addr - start; /* IB with SESSION_INIT */
+	i = vcn_v4_0_enc_find_ib_param(msg, data_size, RENCODE_IB_PARAM_SESSION_INIT,
+		RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+	if (i >= 0 && msg[i + 2] == RENCODE_ENCODE_STANDARD_AV1)
+		r = vcn_v4_0_limit_sched(p, job);
+
+	amdgpu_bo_kunmap(bo);
+	return r;
+}
+
  static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
  					   struct amdgpu_job *job,
  					   struct amdgpu_ib *ib)
@@ -1763,12 +1829,13 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
  		return 0;
/* RADEON_VCN_ENGINE_INFO is at the top of ib block */
-	idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
+	idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, RADEON_VCN_ENGINE_INFO,
  			RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
  	if (idx < 0) /* engine info is missing */
  		return 0;
val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+
  	if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
  		decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
@@ -1779,10 +1846,12 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
  			decode_buffer->msg_buffer_address_lo;
  		return vcn_v4_0_dec_msg(p, job, addr);
  	} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
-		idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
-			RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+		idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw,
+			RENCODE_IB_PARAM_SESSION_INIT, RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
  		if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
  			return vcn_v4_0_limit_sched(p, job);
+	} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE) {
+		return vcn_v4_0_enc_queue_msg(p, job, ib);
  	}
  	return 0;
  }




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux