On Wed, Jun 5, 2024 at 7:27 PM Jay Cornwall <jay.cornwall@xxxxxxx> wrote: > > In commit 6d1878882d2d > ("drm/amdkfd: gfx12 context save/restore trap handler fixes") the > following fix was introduced but incorrectly restricted to gfx12. > The same issue and a corresponding fix apply to gfx10 and gfx11. > > Do not overwrite TRAPSTS.{SAVECTX,HOST_TRAP} when restoring this > register. Both of these fields can assert while the wavefront is > running the trap handler. > > Signed-off-by: Jay Cornwall <jay.cornwall@xxxxxxx> > Cc: Lancelot Six <lancelot.six@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 16 +++++--- > .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 38 ++++++++++++++----- > 2 files changed, 38 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > index 665122d1bbbd..02f7ba8c93cd 100644 > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > @@ -1136,7 +1136,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { > 0x705d0000, 0x807c817c, > 0x8070ff70, 0x00000080, > 0xbf0a7b7c, 0xbf85fff8, > - 0xbf82013d, 0xbef4037e, > + 0xbf82013f, 0xbef4037e, > 0x8775ff7f, 0x0000ffff, > 0x8875ff75, 0x00040000, > 0xbef60380, 0xbef703ff, > @@ -1275,7 +1275,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { > 0x80788478, 0xbf8c0000, > 0xb9eef815, 0xbefc036f, > 0xbefe0370, 0xbeff0371, > - 0xb9f9f816, 0xb9fbf803, > + 0xb9f9f816, 0xb9fb4803, > + 0x907b8b7b, 0xb9fba2c3, > 0xb9f3f801, 0xb96e3a05, > 0x806e816e, 0xbf0d9972, > 0xbf850002, 0x8f6e896e, > @@ -2544,7 +2545,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { > 0xe0704000, 0x705d0000, > 0x807c817c, 0x8070ff70, > 0x00000080, 0xbf0a7b7c, > - 0xbf85fff8, 0xbf820134, > + 0xbf85fff8, 0xbf820136, > 0xbef4037e, 0x8775ff7f, > 0x0000ffff, 0x8875ff75, > 0x00040000, 0xbef60380, > @@ -2683,7 +2684,8 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { > 0xf0000000, 0x80788478, > 0xbf8c0000, 0xb9eef815, > 0xbefc036f, 0xbefe0370, > - 0xbeff0371, 0xb9fbf803, > + 0xbeff0371, 0xb9fb4803, > + 0x907b8b7b, 0xb9fba2c3, > 0xb9f3f801, 0xb96e3a05, > 0x806e816e, 0xbf0d9972, > 0xbf850002, 0x8f6e896e, > @@ -2981,7 +2983,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { > 0x701d0000, 0x807d817d, > 0x8070ff70, 0x00000080, > 0xbf0a7b7d, 0xbfa2fff8, > - 0xbfa0013f, 0xbef4007e, > + 0xbfa00143, 0xbef4007e, > 0x8b75ff7f, 0x0000ffff, > 0x8c75ff75, 0x00040000, > 0xbef60080, 0xbef700ff, > @@ -3123,7 +3125,9 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { > 0x80788478, 0xbf890000, > 0xb96ef815, 0xbefd006f, > 0xbefe0070, 0xbeff0071, > - 0xb97bf803, 0xb973f801, > + 0xb97b4803, 0x857b8b7b, > + 0xb97b22c3, 0x857b867b, > + 0xb97b7443, 0xb973f801, > 0xb8ee3b05, 0x806e816e, > 0xbf0d9972, 0xbfa20002, > 0x846e896e, 0xbfa00001, > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > index ac3702b8e3c4..44772eec9ef4 100644 > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > @@ -119,9 +119,12 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 > var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 > var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 > var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 > +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT = 11 > var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000 > #if ASIC_FAMILY >= CHIP_PLUM_BONITO > +var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT = 16 > var SQ_WAVE_TRAPSTS_WAVE_START_MASK = 0x20000 > +var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT = 17 > var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x40000 > var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x100000 > #endif > @@ -137,14 +140,23 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000 > > var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 > > +var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT > +var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT > + > #if ASIC_FAMILY < CHIP_PLUM_BONITO > var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK > +var S_TRAPSTS_RESTORE_PART_2_SIZE = 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT > +var S_TRAPSTS_RESTORE_PART_3_SHIFT = 0 > +var S_TRAPSTS_RESTORE_PART_3_SIZE = 0 > #else > var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK |\ > SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK |\ > SQ_WAVE_TRAPSTS_WAVE_START_MASK |\ > SQ_WAVE_TRAPSTS_WAVE_END_MASK |\ > SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK > +var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT > +var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_TRAPSTS_WAVE_START_SHIFT > +var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT > #endif > var S_TRAPSTS_HWREG = HW_REG_TRAPSTS > var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK > @@ -157,6 +169,7 @@ var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20 > var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40 > var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6 > var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80 > +var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7 > var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100 > var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8 > var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200 > @@ -173,6 +186,11 @@ var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\ > SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\ > SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\ > SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK > +var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT > +var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT > +var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT > +var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT > +var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT > var BARRIER_STATE_SIGNAL_OFFSET = 16 > var BARRIER_STATE_VALID_OFFSET = 0 > #endif > @@ -1386,17 +1404,17 @@ L_SKIP_BARRIER_RESTORE: > s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask > #endif > > -#if ASIC_FAMILY < CHIP_GFX12 > - s_setreg_b32 hwreg(S_TRAPSTS_HWREG), s_restore_trapsts > -#else > - // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed. > + // {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed. > // Only restore the other fields to avoid clobbering them. > - s_setreg_b32 hwreg(S_TRAPSTS_HWREG, 0, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT), s_restore_trapsts > - s_lshr_b32 s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT > - s_setreg_b32 hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT, 1), s_restore_trapsts > - s_lshr_b32 s_restore_trapsts, s_restore_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT > - s_setreg_b32 hwreg(S_TRAPSTS_HWREG, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT, 32 - SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT), s_restore_trapsts > -#endif > + s_setreg_b32 hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts > + s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT > + s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts > + > +if S_TRAPSTS_RESTORE_PART_3_SIZE > 0 > + s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT > + s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts > +end > + > s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode > > // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic > -- > 2.34.1 >