Instruction modifiers of the untyped vector memory buffer instructions
(MUBUF encoded) changed in gfx940. The slc, scc and glc modifiers have
been replaced with sc0, sc1 and nt.
The current CWSR trap handler is written using pre-gfx940 modifier
names, making the source incompatible with a strict gfx940 assembler.
This patch updates the cwsr_trap_handler_gfx9.s source file to be
compatible with all gfx9 variants of the ISA. The binary assembled code
is unchanged (so the behaviour is unchanged as well), only the source
representation is updated.
Signed-off-by: Lancelot SIX <lancelot.six@xxxxxxx>
---
.../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++++++++++-------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4..a2d597d7fb57 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -48,6 +48,12 @@ var ACK_SQC_STORE = 1
//workaround for suspected SQC store bug causing
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store
failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <=
CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when
SAVECTX raised
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
/* variables */
/**************************************************************************/
@@ -581,7 +587,7 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0,
s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0,
s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
@@ -979,17 +985,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1
offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1
offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1
offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1
offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1
offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1
offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
offset:256*3
s_waitcnt vmcnt(0)
end
base-commit: cf743996352e327f483dc7d66606c90276f57380