[AMD Official Use Only - Internal Distribution Only] I reviewed and tested the patch series. Tested-by: Laurent Morichetti <laurent.morichetti@xxxxxxx> On 10/1/20, 11:34 AM, "Kuehling, Felix" <Felix.Kuehling@xxxxxxx> wrote: The series is Acked-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> I'm hoping Laurent can give it a more through and informed R-b. Thanks, Felix Am 2020-10-01 um 2:24 p.m. schrieb Jay Cornwall: > ATC and MTYPE fields do not exist in gfx9 or later. > > Signed-off-by: Jay Cornwall <jay.cornwall@xxxxxxx> > Cc: Laurent Morichetti <laurent.morichetti@xxxxxxx> > --- > .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 93 ++++++------------- > .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 28 +----- > .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 30 +----- > 3 files changed, 30 insertions(+), 121 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > index affbca7c0050..aa2de525b2e0 100644 > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > @@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { > > > static const uint32_t cwsr_trap_gfx9_hex[] = { > -0xbf820001, 0xbf820248, > +0xbf820001, 0xbf820240, > 0xb8f8f802, 0x89788678, > 0xb8eef801, 0x866eff6e, > 0x00000800, 0xbf840003, > @@ -336,10 +336,6 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { > 0x0000ffff, 0x8775ff75, > 0x00040000, 0xbef60080, > 0xbef700ff, 0x00807fac, > -0x867aff7f, 0x08000000, > -0x8f7a837a, 0x87777a77, > -0x867aff7f, 0x70000000, > -0x8f7a817a, 0x87777a77, > 0xbef1007c, 0xbef00080, > 0xb8f02a05, 0x80708170, > 0x8e708a70, 0xb8fa1605, > @@ -566,15 +562,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { > 0x701d0300, 0x807c847c, > 0x8070ff70, 0x00000400, > 0xbf0a7b7c, 0xbf85ffef, > -0xbf9c0000, 0xbf8200da, > +0xbf9c0000, 0xbf8200cf, > 0xbef4007e, 0x8675ff7f, > 0x0000ffff, 0x8775ff75, > 0x00040000, 0xbef60080, > 0xbef700ff, 0x00807fac, > -0x866eff7f, 0x08000000, > -0x8f6e836e, 0x87776e77, > -0x866eff7f, 0x70000000, > -0x8f6e816e, 0x87776e77, > 0x866eff7f, 0x04000000, > 0xbf84001e, 0xbefe00c1, > 0xbeff00c1, 0xb8ef4306, > @@ -669,18 +661,16 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { > 0x876e6f6e, 0x866fff6d, > 0x04000000, 0x8f6f9a6f, > 0x8e6f8f6f, 0x876e6f6e, > -0x866fff7a, 0x00800000, > -0x8f6f976f, 0xb96ef807, > -0x866dff6d, 0x0000ffff, > -0x86fe7e7e, 0x86ea6a6a, > -0x8f6e837a, 0xb96ee0c2, > -0xbf800002, 0xb97a0002, > -0xbf8a0000, 0x95806f6c, > -0xbf810000, 0x00000000, > +0xb96ef807, 0x866dff6d, > +0x0000ffff, 0x86fe7e7e, > +0x86ea6a6a, 0x8f6e837a, > +0xb96ee0c2, 0xbf800002, > +0xb97a0002, 0xbf8a0000, > +0xbe801f6c, 0xbf810000, > }; > > static const uint32_t cwsr_trap_nv1x_hex[] = { > -0xbf820001, 0xbf8201cd, > +0xbf820001, 0xbf8201c5, > 0xb0804004, 0xb978f802, > 0x8a788678, 0xb96ef801, > 0x876eff6e, 0x00000800, > @@ -740,10 +730,6 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { > 0x0000ffff, 0x8875ff75, > 0x00040000, 0xbef60380, > 0xbef703ff, 0x10807fac, > -0x877aff7f, 0x08000000, > -0x907a837a, 0x88777a77, > -0x877aff7f, 0x70000000, > -0x907a817a, 0x88777a77, > 0xbef1037c, 0xbef00380, > 0xb97302dc, 0x8f739973, > 0x8873737f, 0xb97bf816, > @@ -911,15 +897,11 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { > 0x705d0000, 0x807c817c, > 0x8070ff70, 0x00000080, > 0xbf0a7b7c, 0xbf85fff8, > -0xbf820151, 0xbef4037e, > +0xbf820146, 0xbef4037e, > 0x8775ff7f, 0x0000ffff, > 0x8875ff75, 0x00040000, > 0xbef60380, 0xbef703ff, > -0x10807fac, 0x876eff7f, > -0x08000000, 0x906e836e, > -0x88776e77, 0x876eff7f, > -0x70000000, 0x906e816e, > -0x88776e77, 0xb97202dc, > +0x10807fac, 0xb97202dc, > 0x8f729972, 0x8872727f, > 0x876eff7f, 0x04000000, > 0xbf840034, 0xbefe03c1, > @@ -1075,18 +1057,17 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { > 0x886e6f6e, 0x876fff6d, > 0x01000000, 0x906f986f, > 0x8f6f996f, 0x886e6f6e, > -0x876fff7a, 0x00800000, > -0x906f976f, 0xb9eef807, > -0x876dff6d, 0x0000ffff, > -0x87fe7e7e, 0x87ea6a6a, > -0xb9faf802, 0xbe80226c, > -0xbf810000, 0xbf9f0000, > +0xb9eef807, 0x876dff6d, > +0x0000ffff, 0x87fe7e7e, > +0x87ea6a6a, 0xb9faf802, > +0xbe80226c, 0xbf810000, > 0xbf9f0000, 0xbf9f0000, > 0xbf9f0000, 0xbf9f0000, > +0xbf9f0000, 0x00000000, > }; > > static const uint32_t cwsr_trap_arcturus_hex[] = { > -0xbf820001, 0xbf8202c4, > +0xbf820001, 0xbf8202bc, > 0xb8f8f802, 0x89788678, > 0xb8eef801, 0x866eff6e, > 0x00000800, 0xbf840003, > @@ -1148,11 +1129,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { > 0x8675ff7f, 0x0000ffff, > 0x8775ff75, 0x00040000, > 0xbef60080, 0xbef700ff, > -0x00807fac, 0x867aff7f, > -0x08000000, 0x8f7a837a, > -0x87777a77, 0x867aff7f, > -0x70000000, 0x8f7a817a, > -0x87777a77, 0xbef1007c, > +0x00807fac, 0xbef1007c, > 0xbef00080, 0xb8f02a05, > 0x80708170, 0x8e708a70, > 0x8e708170, 0xb8fa1605, > @@ -1440,15 +1417,11 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { > 0x701d0300, 0x807c847c, > 0x8070ff70, 0x00000400, > 0xbf0a7b7c, 0xbf85ffeb, > -0xbf9c0000, 0xbf820106, > +0xbf9c0000, 0xbf8200fb, > 0xbef4007e, 0x8675ff7f, > 0x0000ffff, 0x8775ff75, > 0x00040000, 0xbef60080, > 0xbef700ff, 0x00807fac, > -0x866eff7f, 0x08000000, > -0x8f6e836e, 0x87776e77, > -0x866eff7f, 0x70000000, > -0x8f6e816e, 0x87776e77, > 0x866eff7f, 0x04000000, > 0xbf84001f, 0xbefe00c1, > 0xbeff00c1, 0xb8ef4306, > @@ -1565,18 +1538,16 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { > 0x876e6f6e, 0x866fff6d, > 0x04000000, 0x8f6f9a6f, > 0x8e6f8f6f, 0x876e6f6e, > -0x866fff7a, 0x00800000, > -0x8f6f976f, 0xb96ef807, > -0x866dff6d, 0x0000ffff, > -0x86fe7e7e, 0x86ea6a6a, > -0x8f6e837a, 0xb96ee0c2, > -0xbf800002, 0xb97a0002, > -0xbf8a0000, 0x95806f6c, > -0xbf810000, 0x00000000, > +0xb96ef807, 0x866dff6d, > +0x0000ffff, 0x86fe7e7e, > +0x86ea6a6a, 0x8f6e837a, > +0xb96ee0c2, 0xbf800002, > +0xb97a0002, 0xbf8a0000, > +0xbe801f6c, 0xbf810000, > }; > > static const uint32_t cwsr_trap_gfx10_hex[] = { > -0xbf820001, 0xbf8201cf, > +0xbf820001, 0xbf8201c7, > 0xb0804004, 0xb978f802, > 0x8a788678, 0xb96ef801, > 0x876eff6e, 0x00000800, > @@ -1615,10 +1586,6 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { > 0x0000ffff, 0x8875ff75, > 0x00040000, 0xbef60380, > 0xbef703ff, 0x10807fac, > -0x877aff7f, 0x08000000, > -0x907a837a, 0x88777a77, > -0x877aff7f, 0x70000000, > -0x907a817a, 0x88777a77, > 0xbef1037c, 0xbef00380, > 0xb97302dc, 0x8f739973, > 0x8873737f, 0xbefe03c1, > @@ -1808,15 +1775,11 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { > 0x705d0000, 0x807c817c, > 0x8070ff70, 0x00000080, > 0xbf0a7b7c, 0xbf85fff8, > -0xbf82013c, 0xbef4037e, > +0xbf820134, 0xbef4037e, > 0x8775ff7f, 0x0000ffff, > 0x8875ff75, 0x00040000, > 0xbef60380, 0xbef703ff, > -0x10807fac, 0x876eff7f, > -0x08000000, 0x906e836e, > -0x88776e77, 0x876eff7f, > -0x70000000, 0x906e816e, > -0x88776e77, 0xb97202dc, > +0x10807fac, 0xb97202dc, > 0x8f729972, 0x8872727f, > 0x876eff7f, 0x04000000, > 0xbf840034, 0xbefe03c1, > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > index 5081f91190b8..c3344acdb094 100644 > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > @@ -35,8 +35,6 @@ > > var SINGLE_STEP_MISSED_WORKAROUND= 1//workaround for lost MODE.DEBUG_EN exception when SAVECTX raised > > -var SQ_WAVE_STATUS_INST_ATC_SHIFT= 23 > -var SQ_WAVE_STATUS_INST_ATC_MASK= 0x00800000 > var SQ_WAVE_STATUS_SPI_PRIO_MASK= 0x00000006 > var SQ_WAVE_STATUS_HALT_MASK= 0x2000 > > @@ -76,9 +74,6 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG= 0x00007FFF > > var SQ_WAVE_MODE_DEBUG_EN_MASK= 0x800 > > -var SQ_BUF_RSRC_WORD1_ATC_SHIFT= 24 > -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT= 27 > - > // bits [31:24] unused by SPI debug data > var TTMP11_SAVE_REPLAY_W64H_SHIFT= 31 > var TTMP11_SAVE_REPLAY_W64H_MASK= 0x80000000 > @@ -90,10 +85,6 @@ var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK= 0x7F000000 > var S_SAVE_BUF_RSRC_WORD1_STRIDE= 0x00040000 > var S_SAVE_BUF_RSRC_WORD3_MISC= 0x10807FAC > > -var S_SAVE_SPI_INIT_ATC_MASK= 0x08000000 > -var S_SAVE_SPI_INIT_ATC_SHIFT= 27 > -var S_SAVE_SPI_INIT_MTYPE_MASK= 0x70000000 > -var S_SAVE_SPI_INIT_MTYPE_SHIFT= 28 > var S_SAVE_SPI_INIT_FIRST_WAVE_MASK= 0x04000000 > var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT= 26 > > @@ -130,10 +121,6 @@ var s_save_ttmps_hi= s_save_trapsts > var S_RESTORE_BUF_RSRC_WORD1_STRIDE= S_SAVE_BUF_RSRC_WORD1_STRIDE > var S_RESTORE_BUF_RSRC_WORD3_MISC= S_SAVE_BUF_RSRC_WORD3_MISC > > -var S_RESTORE_SPI_INIT_ATC_MASK= 0x08000000 > -var S_RESTORE_SPI_INIT_ATC_SHIFT= 27 > -var S_RESTORE_SPI_INIT_MTYPE_MASK= 0x70000000 > -var S_RESTORE_SPI_INIT_MTYPE_SHIFT= 28 > var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK= 0x04000000 > var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT= 26 > var S_WAVE_SIZE= 25 > @@ -326,12 +313,6 @@ L_SLEEP: > s_or_b32s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE > s_mov_b32s_save_buf_rsrc2, 0//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited > s_mov_b32s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC > -s_and_b32s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK > -s_lshr_b32s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) > -s_or_b32s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp//or ATC > -s_and_b32s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK > -s_lshr_b32s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) > -s_or_b32s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp//or MTYPE > > s_mov_b32s_save_m0, m0 > > @@ -674,12 +655,7 @@ L_RESTORE: > s_or_b32s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE > s_mov_b32s_restore_buf_rsrc2, 0//NUM_RECORDS initial value = 0 (in bytes) > s_mov_b32s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC > -s_and_b32s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK > -s_lshr_b32s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) > -s_or_b32s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp//or ATC > -s_and_b32s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK > -s_lshr_b32s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) > -s_or_b32s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp//or MTYPE > + > //determine it is wave32 or wave64 > get_wave_size(s_restore_size) > > @@ -971,8 +947,6 @@ L_RESTORE_HWREG: > s_lshl_b32s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT > s_or_b32s_restore_tmp, s_restore_tmp, s_restore_m0 > > -s_and_b32s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK > -s_lshr_b32s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT > s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp > #endif > > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm > index 75f29d13c90f..0008eb7d1ef4 100644 > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm > @@ -31,8 +31,6 @@ var SINGLE_STEP_MISSED_WORKAROUND =1 //workaround for lost MODE.DEBUG_EN > /**************************************************************************/ > /*variables */ > /**************************************************************************/ > -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 > -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 > var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 > var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 > var SQ_WAVE_STATUS_HALT_MASK = 0x2000 > @@ -70,9 +68,6 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG= 0x00007FFF//FIXME > > var SQ_WAVE_MODE_DEBUG_EN_MASK= 0x800 > > -var SQ_BUF_RSRC_WORD1_ATC_SHIFT =24 > -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT =27 > - > var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT= 26// bits [31:26] unused by SPI debug data > var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK= 0xFC000000 > > @@ -80,10 +75,6 @@ var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK= 0xFC000000 > var S_SAVE_BUF_RSRC_WORD1_STRIDE= 0x00040000//stride is 4 bytes > var S_SAVE_BUF_RSRC_WORD3_MISC= 0x00807FAC//SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE > > -var S_SAVE_SPI_INIT_ATC_MASK= 0x08000000//bit[27]: ATC bit > -var S_SAVE_SPI_INIT_ATC_SHIFT= 27 > -var S_SAVE_SPI_INIT_MTYPE_MASK= 0x70000000//bit[30:28]: Mtype > -var S_SAVE_SPI_INIT_MTYPE_SHIFT= 28 > var S_SAVE_SPI_INIT_FIRST_WAVE_MASK= 0x04000000//bit[26]: FirstWaveInTG > var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT= 26 > > @@ -118,10 +109,6 @@ var s_save_ttmps_hi =s_save_trapsts//no conflict > var S_RESTORE_BUF_RSRC_WORD1_STRIDE =S_SAVE_BUF_RSRC_WORD1_STRIDE > var S_RESTORE_BUF_RSRC_WORD3_MISC =S_SAVE_BUF_RSRC_WORD3_MISC > > -var S_RESTORE_SPI_INIT_ATC_MASK =0x08000000 //bit[27]: ATC bit > -var S_RESTORE_SPI_INIT_ATC_SHIFT =27 > -var S_RESTORE_SPI_INIT_MTYPE_MASK =0x70000000 //bit[30:28]: Mtype > -var S_RESTORE_SPI_INIT_MTYPE_SHIFT =28 > var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK =0x04000000 //bit[26]: FirstWaveInTG > var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT =26 > > @@ -338,12 +325,6 @@ L_SAVE: > s_or_b32 s_save_buf_rsrc1,s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE > s_mov_b32 s_save_buf_rsrc2,0//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited > s_mov_b32 s_save_buf_rsrc3,S_SAVE_BUF_RSRC_WORD3_MISC > - s_and_b32 s_save_tmp,s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK > - s_lshr_b32 s_save_tmp,s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position > - s_or_b32 s_save_buf_rsrc3,s_save_buf_rsrc3, s_save_tmp//or ATC > - s_and_b32 s_save_tmp,s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK > - s_lshr_b32 s_save_tmp,s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position > - s_or_b32 s_save_buf_rsrc3,s_save_buf_rsrc3, s_save_tmp//or MTYPE > > //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) > s_mov_b32 s_save_m0,m0 //save M0 > @@ -673,12 +654,6 @@ L_RESTORE: > s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE > s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) > s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC > - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK > - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position > - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC > - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK > - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position > - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE > > /* global mem offset*/ > // s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 > @@ -898,8 +873,6 @@ end > s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT > s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT > s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 > - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK > - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT > s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp > > s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff//pc[47:32] //Do it here in order not to affect STATUS > @@ -909,8 +882,7 @@ end > > s_barrier//barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time > > -// s_rfe_b64 s_restore_pc_lo//Return to the main shader program and resume execution > - s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0// s_restore_m0[0] is used to set STATUS.inst_atc > + s_rfe_b64 s_restore_pc_lo//Return to the main shader program and resume execution > > > /**************************************************************************/ _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx