add compute/gfx dispatch hang test for gfx9 Signed-off-by: Flora Cui <flora.cui@xxxxxxx> --- tests/amdgpu/amdgpu_test.c | 12 +++++++ tests/amdgpu/amdgpu_test.h | 1 + tests/amdgpu/basic_tests.c | 67 ++++++++++++++++++++++++++++------- tests/amdgpu/deadlock_tests.c | 14 ++++++++ 4 files changed, 81 insertions(+), 13 deletions(-) diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c index 94bc3056..3ac9d8d2 100644 --- a/tests/amdgpu/amdgpu_test.c +++ b/tests/amdgpu/amdgpu_test.c @@ -460,6 +460,18 @@ static void amdgpu_disable_suites() "illegal mem access test (set amdgpu.vm_fault_stop=2)", CU_FALSE)) fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + /* This test was ran on GFX9 only */ + //if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV) + if (amdgpu_set_test_active(DEADLOCK_TESTS_STR, + "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", CU_FALSE)) + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + + /* This test was ran on GFX9 only */ + //if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV) + if (amdgpu_set_test_active(DEADLOCK_TESTS_STR, + "compute ring bad dispatch test (set amdgpu.lockup_timeout=50)", CU_FALSE)) + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE)) fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h index 0cb6ee98..2b01bf41 100644 --- a/tests/amdgpu/amdgpu_test.h +++ b/tests/amdgpu/amdgpu_test.h @@ -241,6 +241,7 @@ CU_BOOL suite_syncobj_timeline_tests_enable(void); */ extern CU_TestInfo syncobj_timeline_tests[]; +void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type); /** * Helper functions diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index a57dcbb4..71c9220d 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -311,7 +311,8 @@ static uint32_t shader_bin[] = { enum cs_type { CS_BUFFERCLEAR, - CS_BUFFERCOPY + CS_BUFFERCOPY, + CS_HANG }; static const uint32_t bufferclear_cs_shader_gfx9[] = { @@ -473,6 +474,14 @@ static const uint32_t cached_cmd_gfx9[] = { 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 }; +unsigned int memcpy_ps_hang[] = { + 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, + 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, + 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, + 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, + 0xF800180F, 0x03020100, 0xBF810000 +}; + int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, unsigned alignment, unsigned heap, uint64_t alloc_flags, uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, @@ -2189,6 +2198,10 @@ static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, shader = buffercopy_cs_shader_gfx9; shader_size = sizeof(buffercopy_cs_shader_gfx9); break; + case CS_HANG: + shader = memcpy_ps_hang; + shader_size = sizeof(memcpy_ps_hang); + break; default: return -1; break; @@ -2409,7 +2422,8 @@ static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, - uint32_t ring) + uint32_t ring, + int hang) { amdgpu_context_handle context_handle; amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; @@ -2425,7 +2439,8 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, int bo_cmd_size = 4096; struct amdgpu_cs_request ibs_request = {0}; struct amdgpu_cs_ib_info ib_info= {0}; - uint32_t expired; + uint32_t expired, hang_state, hangs; + enum cs_type cs_type; amdgpu_bo_list_handle bo_list; struct amdgpu_cs_fence fence_status = {0}; @@ -2446,7 +2461,8 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, CU_ASSERT_EQUAL(r, 0); memset(ptr_shader, 0, bo_shader_size); - r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY ); + cs_type = hang ? CS_HANG : CS_BUFFERCOPY; + r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); CU_ASSERT_EQUAL(r, 0); r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, @@ -2532,14 +2548,21 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, r = amdgpu_cs_query_fence_status(&fence_status, AMDGPU_TIMEOUT_INFINITE, 0, &expired); - CU_ASSERT_EQUAL(r, 0); - CU_ASSERT_EQUAL(expired, true); - /* verify if memcpy test result meets with expected */ - i = 0; - while(i < bo_dst_size) { - CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); - i++; + if (!hang) { + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(expired, true); + + /* verify if memcpy test result meets with expected */ + i = 0; + while(i < bo_dst_size) { + CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); + i++; + } + } else { + r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); + CU_ASSERT_EQUAL(r, 0); + CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); } r = amdgpu_bo_list_destroy(bo_list); @@ -2573,7 +2596,7 @@ static void amdgpu_compute_dispatch_test(void) for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); - amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); + amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); } } @@ -2590,7 +2613,25 @@ static void amdgpu_gfx_dispatch_test(void) for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); - amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); + amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); + } +} + +void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) +{ + int r; + struct drm_amdgpu_info_hw_ip info; + uint32_t ring_id; + + r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); + CU_ASSERT_EQUAL(r, 0); + if (!info.available_rings) + printf("SKIP ... as there's no ring for ip %d\n", ip_type); + + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); } } diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c index 7d028829..61342d1a 100644 --- a/tests/amdgpu/deadlock_tests.c +++ b/tests/amdgpu/deadlock_tests.c @@ -114,6 +114,8 @@ static void amdgpu_deadlock_compute(void); static void amdgpu_illegal_reg_access(); static void amdgpu_illegal_mem_access(); static void amdgpu_deadlock_sdma(void); +static void amdgpu_dispatch_hang_gfx(void); +static void amdgpu_dispatch_hang_compute(void); CU_BOOL suite_deadlock_tests_enable(void) { @@ -188,6 +190,8 @@ CU_TestInfo deadlock_tests[] = { { "sdma ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_sdma }, { "illegal reg access test", amdgpu_illegal_reg_access }, { "illegal mem access test (set amdgpu.vm_fault_stop=2)", amdgpu_illegal_mem_access }, + { "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_gfx }, + { "compute ring bad dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_compute }, CU_TEST_INFO_NULL, }; @@ -488,3 +492,13 @@ static void amdgpu_illegal_mem_access() { bad_access_helper(0); } + +static void amdgpu_dispatch_hang_gfx(void) +{ + amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX); +} + +static void amdgpu_dispatch_hang_compute(void) +{ + amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE); +} -- 2.17.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel