Enable host trap. Signed-off-by: James Zhu <James.Zhu@xxxxxxx> --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 63 +++++++++++-------- .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++--- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d1caaf0e6a7c..af1f678790e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820258, + 0xbf820001, 0xbf82025e, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, 0xbf840009, 0x866eff6d, 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf850055, 0xbf8e0010, + 0xbf85005b, 0xbf8e0010, 0xb8fbf803, 0xbf82fffa, 0x866eff7b, 0x03c00900, 0xbf850015, 0x866eff7b, @@ -294,7 +294,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xbf850007, 0xb8eef801, 0x866eff6e, 0x00000800, 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0x00000400, 0xbf850040, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8977ff77, 0xfc000000, @@ -303,13 +303,16 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8977ff77, 0x00800000, - 0x87776e77, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031c3d, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, + 0xbf8cc07f, 0x8671ff6d, + 0x01000000, 0xbf840004, + 0x92f1ff70, 0x00010001, + 0xbf840016, 0xbf820005, + 0x86708170, 0x8e709770, + 0x8977ff77, 0x00800000, + 0x87777077, 0x86ee6e6e, 0xbf840001, 0xbe801d6e, 0x866eff6d, 0x01ff0000, 0xbf850005, 0x8778ff78, @@ -1098,14 +1101,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { }; static const uint32_t cwsr_trap_arcturus_hex[] = { - 0xbf820001, 0xbf8202d4, + 0xbf820001, 0xbf8202da, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, 0xbf840009, 0x866eff6d, 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf850055, 0xbf8e0010, + 0xbf85005b, 0xbf8e0010, 0xb8fbf803, 0xbf82fffa, 0x866eff7b, 0x03c00900, 0xbf850015, 0x866eff7b, @@ -1118,7 +1121,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xbf850007, 0xb8eef801, 0x866eff6e, 0x00000800, 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0x00000400, 0xbf850040, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8977ff77, 0xfc000000, @@ -1127,13 +1130,16 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8977ff77, 0x00800000, - 0x87776e77, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031c3d, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, + 0xbf8cc07f, 0x8671ff6d, + 0x01000000, 0xbf840004, + 0x92f1ff70, 0x00010001, + 0xbf840016, 0xbf820005, + 0x86708170, 0x8e709770, + 0x8977ff77, 0x00800000, + 0x87777077, 0x86ee6e6e, 0xbf840001, 0xbe801d6e, 0x866eff6d, 0x01ff0000, 0xbf850005, 0x8778ff78, @@ -1578,14 +1584,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { }; static const uint32_t cwsr_trap_aldebaran_hex[] = { - 0xbf820001, 0xbf8202df, + 0xbf820001, 0xbf8202e5, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, 0xbf840009, 0x866eff6d, 0x00ff0000, 0xbf85001e, 0x866eff7b, 0x00000400, - 0xbf850055, 0xbf8e0010, + 0xbf85005b, 0xbf8e0010, 0xb8fbf803, 0xbf82fffa, 0x866eff7b, 0x03c00900, 0xbf850015, 0x866eff7b, @@ -1598,7 +1604,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xbf850007, 0xb8eef801, 0x866eff6e, 0x00000800, 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0x00000400, 0xbf850040, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8977ff77, 0xfc000000, @@ -1607,13 +1613,16 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8977ff77, 0x00800000, - 0x87776e77, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031c3d, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, + 0xbf8cc07f, 0x8671ff6d, + 0x01000000, 0xbf840004, + 0x92f1ff70, 0x00010001, + 0xbf840016, 0xbf820005, + 0x86708170, 0x8e709770, + 0x8977ff77, 0x00800000, + 0x87777077, 0x86ee6e6e, 0xbf840001, 0xbe801d6e, 0x866eff6d, 0x01ff0000, 0xbf850005, 0x8778ff78, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index bb26338204f4..991fe6bb1726 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -104,6 +104,10 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 +var TMA_HOST_TRAP_EN_SHIFT = 1 +var TMA_HOST_TRAP_EN_SIZE = 1 +var TMA_HOST_TRAP_EN_BFE = (TMA_HOST_TRAP_EN_SHIFT | (TMA_HOST_TRAP_EN_SIZE << 16)) + var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 var TTMP_DEBUG_TRAP_ENABLED_SHIFT = 23 @@ -288,17 +292,21 @@ L_FETCH_2ND_TRAP: s_or_b32 ttmp15, ttmp15, 0xFFFF0000 L_NO_SIGN_EXTEND_TMA: - s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag - s_waitcnt lgkmcnt(0) - s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT - s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK - s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp2 - + s_load_dword ttmp4, [ttmp14, ttmp15], 0x10 glc:1 // enable flags from 1st level TMA s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA - s_waitcnt lgkmcnt(0) s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) - + s_and_b32 ttmp5, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK // host trap request + s_cbranch_scc0 L_NOT_HT + s_bfe_u32 ttmp5, ttmp4, TMA_HOST_TRAP_EN_BFE // extract host_trap_en to ttmp5[0] + s_cbranch_scc0 L_EXIT_TRAP // HT requested, but host traps not enabled + s_branch L_GOTO_2ND_TRAP +L_NOT_HT: + s_and_b32 ttmp4, ttmp4, 0x1 // debug_enable bit left over + s_lshl_b32 ttmp4, ttmp4, TTMP_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK + s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp4 +L_GOTO_2ND_TRAP: s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler -- 2.25.1