From: Yong Zhao <yong.zhao@xxxxxxx> The problem happens on Raven and Carrizo. The context save handler should not clear the high bits of PC_HI before extracting the bits of IB_STS. The bug is not relevant to VEGA10 until we enable demand paging. Signed-off-by: Jay Cornwall <Jay.Cornwall at amd.com> Signed-off-by: Yong Zhao <yong.zhao at amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 4 ++-- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | 3 +-- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index a546a21..f68aef0 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -253,7 +253,6 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x00000072, 0x80728472, 0xc0211b7c, 0x00000072, 0x80728472, 0xbf8c007f, - 0x8671ff71, 0x0000ffff, 0xbefc0073, 0xbefe006e, 0xbeff006f, 0x867375ff, 0x000003ff, 0xb9734803, @@ -267,6 +266,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x8e738f73, 0x87767376, 0x8673ff74, 0x00800000, 0x8f739773, 0xb976f807, + 0x8671ff71, 0x0000ffff, 0x86fe7e7e, 0x86ea6a6a, 0xb974f802, 0xbf8a0000, 0x95807370, 0xbf810000, @@ -530,7 +530,6 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x00000078, 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, 0xbf8cc07f, - 0x866dff6d, 0x0000ffff, 0xbefc006f, 0xbefe007a, 0xbeff007b, 0x866f71ff, 0x000003ff, 0xb96f4803, @@ -554,6 +553,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8e6f8f6f, 0x876e6f6e, 0x866fff70, 0x00800000, 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, 0x86fe7e7e, 0x86ea6a6a, 0xb970f802, 0xbf8a0000, 0x95806f6c, 0xbf810000, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index 658a4c6..a2a04bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -1015,8 +1015,6 @@ end s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) @@ -1052,6 +1050,7 @@ end s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 065f55a..998be96 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -1067,8 +1067,6 @@ end s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) @@ -1119,6 +1117,7 @@ end s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu -- 2.7.4