Am 28.11.2017 um 16:32 schrieb Andrey Grodzovsky: > The test is as following: > > 1) Create context A & B > 2) Send a command submission using context A which fires up a compute shader. > 3) The shader wait a bit and then write a value to a memory location. > 4) Send a command submission using context B which writes another value to the same memory location, but having an explicit dependency on the first command submission. > 5) Wait with the CPU for both submissions to finish and inspect the written value. > > Test passes if the value seen in the memory location after both submissions is from command B. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> Would probably be nice to add this to the deadlock.c tests instead of the basic_tests.c, but I think that won't work because of the PM4 defines needed. Anyway patch is Acked-by: Christian König <christian.koenig at amd.com> Regards, Christian. > --- > tests/amdgpu/amdgpu_test.c | 18 ++++ > tests/amdgpu/basic_tests.c | 264 +++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 282 insertions(+) > > diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c > index 50da17c..8fa3399 100644 > --- a/tests/amdgpu/amdgpu_test.c > +++ b/tests/amdgpu/amdgpu_test.c > @@ -49,6 +49,7 @@ > #include "CUnit/Basic.h" > > #include "amdgpu_test.h" > +#include "amdgpu_internal.h" > > /* Test suit names */ > #define BASIC_TESTS_STR "Basic Tests" > @@ -401,9 +402,20 @@ static int amdgpu_find_device(uint8_t bus, uint16_t dev) > > static void amdgpu_disable_suits() > { > + amdgpu_device_handle device_handle; > + uint32_t major_version, minor_version, family_id; > int i; > int size = sizeof(suites_active_stat) / sizeof(suites_active_stat[0]); > > + if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, > + &minor_version, &device_handle)) > + return; > + > + family_id = device_handle->info.family_id; > + > + if (amdgpu_device_deinitialize(device_handle)) > + return; > + > /* Set active status for suits based on their policies */ > for (i = 0; i < size; ++i) > if (amdgpu_set_suite_active(suites_active_stat[i].pName, > @@ -420,6 +432,12 @@ static void amdgpu_disable_suits() > > if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE)) > fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); > + > + > + /* This test was ran on GFX8 and GFX9 only */ > + if (family_id < AMDGPU_FAMILY_VI || family_id > AMDGPU_FAMILY_RV) > + if (amdgpu_set_test_active(BASIC_TESTS_STR, "Sync dependency Test", CU_FALSE)) > + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); > } > > /* The main() function for setting up and running the tests. > diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c > index e7f48e3..a78cf52 100644 > --- a/tests/amdgpu/basic_tests.c > +++ b/tests/amdgpu/basic_tests.c > @@ -50,6 +50,7 @@ static void amdgpu_command_submission_multi_fence(void); > static void amdgpu_command_submission_sdma(void); > static void amdgpu_userptr_test(void); > static void amdgpu_semaphore_test(void); > +static void amdgpu_sync_dependency_test(void); > > static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); > static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); > @@ -63,6 +64,7 @@ CU_TestInfo basic_tests[] = { > { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, > { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, > { "SW semaphore Test", amdgpu_semaphore_test }, > + { "Sync dependency Test", amdgpu_sync_dependency_test }, > CU_TEST_INFO_NULL, > }; > #define BUFFER_SIZE (8 * 1024) > @@ -226,6 +228,60 @@ CU_TestInfo basic_tests[] = { > */ > # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) > > + > +#define PKT3_CONTEXT_CONTROL 0x28 > +#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) > +#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) > +#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) > + > +#define PKT3_CLEAR_STATE 0x12 > + > +#define PKT3_SET_SH_REG 0x76 > +#define PACKET3_SET_SH_REG_START 0x00002c00 > + > +#define PACKET3_DISPATCH_DIRECT 0x15 > + > + > +/* gfx 8 */ > +#define mmCOMPUTE_PGM_LO 0x2e0c > +#define mmCOMPUTE_PGM_RSRC1 0x2e12 > +#define mmCOMPUTE_TMPRING_SIZE 0x2e18 > +#define mmCOMPUTE_USER_DATA_0 0x2e40 > +#define mmCOMPUTE_USER_DATA_1 0x2e41 > +#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 > +#define mmCOMPUTE_NUM_THREAD_X 0x2e07 > + > + > + > +#define SWAP_32(num) ((num>>24)&0xff) | \ > + ((num<<8)&0xff0000) | \ > + ((num>>8)&0xff00) | \ > + ((num<<24)&0xff000000) > + > + > +/* Shader code > + * void main() > +{ > + > + float x = some_input; > + for (unsigned i = 0; i < 1000000; i++) > + x = sin(x); > + > + u[0] = 42u; > +} > +*/ > + > +static uint32_t shader_bin[] = { > + SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), > + SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), > + SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), > + SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) > +}; > + > +#define CODE_OFFSET 512 > +#define DATA_OFFSET 1024 > + > + > int suite_basic_tests_init(void) > { > struct amdgpu_gpu_info gpu_info = {0}; > @@ -1386,3 +1442,211 @@ static void amdgpu_userptr_test(void) > > wait(NULL); > } > + > +static void amdgpu_sync_dependency_test(void) > +{ > + amdgpu_context_handle context_handle[2]; > + amdgpu_bo_handle ib_result_handle; > + void *ib_result_cpu; > + uint64_t ib_result_mc_address; > + struct amdgpu_cs_request ibs_request; > + struct amdgpu_cs_ib_info ib_info; > + struct amdgpu_cs_fence fence_status; > + uint32_t expired; > + int i, j, r, instance; > + amdgpu_bo_list_handle bo_list; > + amdgpu_va_handle va_handle; > + static uint32_t *ptr; > + uint64_t seq_no; > + > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); > + CU_ASSERT_EQUAL(r, 0); > + r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, > + AMDGPU_GEM_DOMAIN_GTT, 0, > + &ib_result_handle, &ib_result_cpu, > + &ib_result_mc_address, &va_handle); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, > + &bo_list); > + CU_ASSERT_EQUAL(r, 0); > + > + ptr = ib_result_cpu; > + i = 0; > + > + memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); > + > + /* Dispatch minimal init config and verify it's executed */ > + ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); > + ptr[i++] = 0x80000000; > + ptr[i++] = 0x80000000; > + > + ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); > + ptr[i++] = 0x80000000; > + > + > + /* Program compute regs */ > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); > + ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; > + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; > + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; > + > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); > + ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; > + /* > + * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 > + SGPRS = 1 > + PRIORITY = 0 > + FLOAT_MODE = 192 (0xc0) > + PRIV = 0 > + DX10_CLAMP = 1 > + DEBUG_MODE = 0 > + IEEE_MODE = 0 > + BULKY = 0 > + CDBG_USER = 0 > + * > + */ > + ptr[i++] = 0x002c0040; > + > + > + /* > + * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 > + USER_SGPR = 8 > + TRAP_PRESENT = 0 > + TGID_X_EN = 0 > + TGID_Y_EN = 0 > + TGID_Z_EN = 0 > + TG_SIZE_EN = 0 > + TIDIG_COMP_CNT = 0 > + EXCP_EN_MSB = 0 > + LDS_SIZE = 0 > + EXCP_EN = 0 > + * > + */ > + ptr[i++] = 0x00000010; > + > + > +/* > + * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) > + WAVESIZE = 0 > + * > + */ > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); > + ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; > + ptr[i++] = 0x00000100; > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); > + ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; > + ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); > + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); > + ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; > + ptr[i++] = 0; > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); > + ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; > + ptr[i++] = 1; > + ptr[i++] = 1; > + ptr[i++] = 1; > + > + > + /* Dispatch */ > + ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); > + ptr[i++] = 1; > + ptr[i++] = 1; > + ptr[i++] = 1; > + ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ > + > + > + while (i & 7) > + ptr[i++] = 0xffff1000; /* type3 nop packet */ > + > + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); > + ib_info.ib_mc_address = ib_result_mc_address; > + ib_info.size = i; > + > + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); > + ibs_request.ip_type = AMDGPU_HW_IP_GFX; > + ibs_request.ring = 0; > + ibs_request.number_of_ibs = 1; > + ibs_request.ibs = &ib_info; > + ibs_request.resources = bo_list; > + ibs_request.fence_info.handle = NULL; > + > + r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); > + CU_ASSERT_EQUAL(r, 0); > + seq_no = ibs_request.seq_no; > + > + > + > + /* Prepare second command with dependency on the first */ > + j = i; > + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); > + ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; > + ptr[i++] = 0xfffffffc & ib_result_mc_address + DATA_OFFSET * 4; > + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; > + ptr[i++] = 99; > + > + while (i & 7) > + ptr[i++] = 0xffff1000; /* type3 nop packet */ > + > + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); > + ib_info.ib_mc_address = ib_result_mc_address + j * 4; > + ib_info.size = i - j; > + > + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); > + ibs_request.ip_type = AMDGPU_HW_IP_GFX; > + ibs_request.ring = 0; > + ibs_request.number_of_ibs = 1; > + ibs_request.ibs = &ib_info; > + ibs_request.resources = bo_list; > + ibs_request.fence_info.handle = NULL; > + > + ibs_request.number_of_dependencies = 1; > + > + ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); > + ibs_request.dependencies[0].context = context_handle[1]; > + ibs_request.dependencies[0].ip_instance = 0; > + ibs_request.dependencies[0].ring = 0; > + ibs_request.dependencies[0].fence = seq_no; > + > + > + r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); > + CU_ASSERT_EQUAL(r, 0); > + > + > + memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); > + fence_status.context = context_handle[0]; > + fence_status.ip_type = AMDGPU_HW_IP_GFX; > + fence_status.ip_instance = 0; > + fence_status.ring = 0; > + fence_status.fence = ibs_request.seq_no; > + > + r = amdgpu_cs_query_fence_status(&fence_status, > + AMDGPU_TIMEOUT_INFINITE,0, &expired); > + CU_ASSERT_EQUAL(r, 0); > + > + /* Expect the second command to wait for shader to complete */ > + CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); > + > + r = amdgpu_bo_list_destroy(bo_list); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, > + ib_result_mc_address, 4096); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_cs_ctx_free(context_handle[0]); > + CU_ASSERT_EQUAL(r, 0); > + r = amdgpu_cs_ctx_free(context_handle[1]); > + CU_ASSERT_EQUAL(r, 0); > + > + free(ibs_request.dependencies); > +}