On Wed, Jan 24, 2024 at 4:01 PM Jay Cornwall <jay.cornwall@xxxxxxx> wrote: > > On 1/15/2024 13:07, Jay Cornwall wrote: > > This instruction has no functional difference to S_ENDPGM > > but allows performance counters to track save events correctly. > > > > Signed-off-by: Jay Cornwall <jay.cornwall@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 14 +++++++------- > > .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 2 +- > > .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 2 +- > > 3 files changed, 9 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > > index df75863393fc..d1caaf0e6a7c 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h > > @@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { > > 0x86ea6a6a, 0x8f6e837a, > > 0xb96ee0c2, 0xbf800002, > > 0xb97a0002, 0xbf8a0000, > > - 0xbe801f6c, 0xbf810000, > > + 0xbe801f6c, 0xbf9b0000, > > }; > > > > static const uint32_t cwsr_trap_nv1x_hex[] = { > > @@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { > > 0xb9eef807, 0x876dff6d, > > 0x0000ffff, 0x87fe7e7e, > > 0x87ea6a6a, 0xb9faf802, > > - 0xbe80226c, 0xbf810000, > > + 0xbe80226c, 0xbf9b0000, > > 0xbf9f0000, 0xbf9f0000, > > 0xbf9f0000, 0xbf9f0000, > > 0xbf9f0000, 0x00000000, > > @@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { > > 0x86ea6a6a, 0x8f6e837a, > > 0xb96ee0c2, 0xbf800002, > > 0xb97a0002, 0xbf8a0000, > > - 0xbe801f6c, 0xbf810000, > > + 0xbe801f6c, 0xbf9b0000, > > }; > > > > static const uint32_t cwsr_trap_aldebaran_hex[] = { > > @@ -2065,7 +2065,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { > > 0x86ea6a6a, 0x8f6e837a, > > 0xb96ee0c2, 0xbf800002, > > 0xb97a0002, 0xbf8a0000, > > - 0xbe801f6c, 0xbf810000, > > + 0xbe801f6c, 0xbf9b0000, > > }; > > > > static const uint32_t cwsr_trap_gfx10_hex[] = { > > @@ -2500,7 +2500,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { > > 0x876dff6d, 0x0000ffff, > > 0x87fe7e7e, 0x87ea6a6a, > > 0xb9faf802, 0xbe80226c, > > - 0xbf810000, 0xbf9f0000, > > + 0xbf9b0000, 0xbf9f0000, > > 0xbf9f0000, 0xbf9f0000, > > 0xbf9f0000, 0xbf9f0000, > > }; > > @@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { > > 0xb8eef802, 0xbf0d866e, > > 0xbfa20002, 0xb97af802, > > 0xbe80486c, 0xb97af802, > > - 0xbe804a6c, 0xbfb00000, > > + 0xbe804a6c, 0xbfb10000, > > 0xbf9f0000, 0xbf9f0000, > > 0xbf9f0000, 0xbf9f0000, > > 0xbf9f0000, 0x00000000, > > @@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { > > 0x86ea6a6a, 0x8f6e837a, > > 0xb96ee0c2, 0xbf800002, > > 0xb97a0002, 0xbf8a0000, > > - 0xbe801f6c, 0xbf810000, > > + 0xbe801f6c, 0xbf9b0000, > > }; > > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > > index e0140df0b0ec..71b3dc0c7363 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm > > @@ -1104,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV: > > s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution > > > > L_END_PGM: > > - s_endpgm > > + s_endpgm_saved > > end > > > > function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) > > diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm > > index e506411ad28a..bb26338204f4 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm > > +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm > > @@ -921,7 +921,7 @@ L_RESTORE: > > /* the END */ > > /**************************************************************************/ > > L_END_PGM: > > - s_endpgm > > + s_endpgm_saved > > > > end > > > > Ping. Patch has been tested and verified, just looking for an Ack.