From: Leo Liu <leo.liu@xxxxxxx> swizzle mode needs reference and input picture luma and chroma pitch aligned with 256 Signed-off-by: Leo Liu <leo.liu at amd.com> Acked-by: Alex Deucher <alexander.deucher at amd.com> --- tests/amdgpu/vce_tests.c | 54 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c index de63aa1..b03807b 100644 --- a/tests/amdgpu/vce_tests.c +++ b/tests/amdgpu/vce_tests.c @@ -227,36 +227,39 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo) r = amdgpu_va_range_free(vce_bo->va_handle); CU_ASSERT_EQUAL(r, 0); r = amdgpu_bo_free(vce_bo->handle); CU_ASSERT_EQUAL(r, 0); memset(vce_bo, 0, sizeof(*vce_bo)); } static void amdgpu_cs_vce_create(void) { + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; int len, r; enc.width = vce_create[6]; enc.height = vce_create[7]; num_resources = 0; alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc.fb[0].handle; resources[num_resources++] = ib_handle; len = 0; memcpy(ib_cpu, vce_session, sizeof(vce_session)); len += sizeof(vce_session) / 4; memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); len += sizeof(vce_taskinfo) / 4; memcpy((ib_cpu + len), vce_create, sizeof(vce_create)); + ib_cpu[len + 8] = ALIGN(enc.width, align); + ib_cpu[len + 9] = ALIGN(enc.width, align); len += sizeof(vce_create) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc.fb[0].addr >> 32; ib_cpu[len + 3] = enc.fb[0].addr; len += sizeof(vce_feedback) / 4; r = submit(len, AMDGPU_HW_IP_VCE); CU_ASSERT_EQUAL(r, 0); free_resource(&enc.fb[0]); @@ -284,102 +287,118 @@ static void amdgpu_cs_vce_config(void) len += sizeof(vce_pic_ctrl) / 4; r = submit(len, AMDGPU_HW_IP_VCE); CU_ASSERT_EQUAL(r, 0); } static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) { uint64_t luma_offset, chroma_offset; - int len = 0, r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); + int len = 0, i, r; luma_offset = enc->vbuf.addr; - chroma_offset = luma_offset + enc->width * enc->height; + chroma_offset = luma_offset + luma_size; memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); len += sizeof(vce_session) / 4; memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); len += sizeof(vce_taskinfo) / 4; memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer)); ib_cpu[len + 2] = enc->bs[0].addr >> 32; ib_cpu[len + 3] = enc->bs[0].addr; len += sizeof(vce_bs_buffer) / 4; memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer)); ib_cpu[len + 2] = enc->cpb.addr >> 32; ib_cpu[len + 3] = enc->cpb.addr; len += sizeof(vce_context_buffer) / 4; memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); + for (i = 0; i < 8; ++i) + ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); + for (i = 0; i < 8; ++i) + ib_cpu[len + 10 + i] = luma_size * 1.5; len += sizeof(vce_aux_buffer) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc->fb[0].addr >> 32; ib_cpu[len + 3] = enc->fb[0].addr; len += sizeof(vce_feedback) / 4; memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode)); ib_cpu[len + 9] = luma_offset >> 32; ib_cpu[len + 10] = luma_offset; ib_cpu[len + 11] = chroma_offset >> 32; ib_cpu[len + 12] = chroma_offset; - ib_cpu[len + 73] = 0x7800; - ib_cpu[len + 74] = 0x7800 + 0x5000; + ib_cpu[len + 14] = ALIGN(enc->width, align); + ib_cpu[len + 15] = ALIGN(enc->width, align); + ib_cpu[len + 73] = luma_size * 1.5; + ib_cpu[len + 74] = luma_size * 2.5; len += sizeof(vce_encode) / 4; enc->ib_len = len; if (!enc->two_instance) { r = submit(len, AMDGPU_HW_IP_VCE); CU_ASSERT_EQUAL(r, 0); } } static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) { uint64_t luma_offset, chroma_offset; - int len, r; + int len, i, r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); len = (enc->two_instance) ? enc->ib_len : 0; luma_offset = enc->vbuf.addr; - chroma_offset = luma_offset + enc->width * enc->height; + chroma_offset = luma_offset + luma_size; if (!enc->two_instance) { memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); len += sizeof(vce_session) / 4; } memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); len += sizeof(vce_taskinfo) / 4; memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer)); ib_cpu[len + 2] = enc->bs[1].addr >> 32; ib_cpu[len + 3] = enc->bs[1].addr; len += sizeof(vce_bs_buffer) / 4; memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer)); ib_cpu[len + 2] = enc->cpb.addr >> 32; ib_cpu[len + 3] = enc->cpb.addr; len += sizeof(vce_context_buffer) / 4; memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); + for (i = 0; i < 8; ++i) + ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); + for (i = 0; i < 8; ++i) + ib_cpu[len + 10 + i] = luma_size * 1.5; len += sizeof(vce_aux_buffer) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc->fb[1].addr >> 32; ib_cpu[len + 3] = enc->fb[1].addr; len += sizeof(vce_feedback) / 4; memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode)); ib_cpu[len + 2] = 0; ib_cpu[len + 9] = luma_offset >> 32; ib_cpu[len + 10] = luma_offset; ib_cpu[len + 11] = chroma_offset >> 32; ib_cpu[len + 12] = chroma_offset; + ib_cpu[len + 14] = ALIGN(enc->width, align); + ib_cpu[len + 15] = ALIGN(enc->width, align); ib_cpu[len + 18] = 0; ib_cpu[len + 19] = 0; ib_cpu[len + 56] = 3; ib_cpu[len + 57] = 0; ib_cpu[len + 58] = 0; - ib_cpu[len + 59] = 0x7800; - ib_cpu[len + 60] = 0x7800 + 0x5000; + ib_cpu[len + 59] = luma_size * 1.5; + ib_cpu[len + 60] = luma_size * 2.5; ib_cpu[len + 73] = 0; - ib_cpu[len + 74] = 0x5000; + ib_cpu[len + 74] = luma_size; ib_cpu[len + 81] = 1; ib_cpu[len + 82] = 1; len += sizeof(vce_encode) / 4; r = submit(len, AMDGPU_HW_IP_VCE); CU_ASSERT_EQUAL(r, 0); } static void check_result(struct amdgpu_vce_encode *enc) { @@ -401,42 +420,53 @@ static void check_result(struct amdgpu_vce_encode *enc) sum += enc->bs[i].ptr[j]; CU_ASSERT_EQUAL(sum, s[i]); r = amdgpu_bo_cpu_unmap(enc->bs[i].handle); CU_ASSERT_EQUAL(r, 0); } } static void amdgpu_cs_vce_encode(void) { uint32_t vbuf_size, bs_size = 0x154000, cpb_size; - int r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + int i, r; - vbuf_size = enc.width * enc.height * 1.5; + vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5; cpb_size = vbuf_size * 10; num_resources = 0; alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc.fb[0].handle; alloc_resource(&enc.fb[1], 4096, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc.fb[1].handle; alloc_resource(&enc.bs[0], bs_size, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc.bs[0].handle; alloc_resource(&enc.bs[1], bs_size, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc.bs[1].handle; alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM); resources[num_resources++] = enc.vbuf.handle; alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM); resources[num_resources++] = enc.cpb.handle; resources[num_resources++] = ib_handle; r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr); CU_ASSERT_EQUAL(r, 0); - memcpy(enc.vbuf.ptr, frame, sizeof(frame)); + + memset(enc.vbuf.ptr, 0, vbuf_size); + for (i = 0; i < enc.height; ++i) { + memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width); + enc.vbuf.ptr += ALIGN(enc.width, align); + } + for (i = 0; i < enc.height / 2; ++i) { + memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width); + enc.vbuf.ptr += ALIGN(enc.width, align); + } + r = amdgpu_bo_cpu_unmap(enc.vbuf.handle); CU_ASSERT_EQUAL(r, 0); amdgpu_cs_vce_config(); if (family_id >= AMDGPU_FAMILY_VI) { vce_taskinfo[3] = 3; amdgpu_cs_vce_encode_idr(&enc); amdgpu_cs_vce_encode_p(&enc); check_result(&enc); -- 2.7.4