On gfx10+, every wave has 106 regular SGPRs followed immediately by VCC, meaning we should show 108 SGPRs by default. They are followed by 16 TTMPs, for 124 in total. Signed-off-by: Nicolai Hähnle <nicolai.haehnle@xxxxxxx> --- src/app/gui/commands.c | 16 ++++++++-------- src/app/print_waves.c | 4 ++-- src/lib/lowlevel/linux/read_gprwave.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/app/gui/commands.c b/src/app/gui/commands.c index 45bb9d4..b7b28a7 100644 --- a/src/app/gui/commands.c +++ b/src/app/gui/commands.c @@ -1626,29 +1626,29 @@ static void wave_to_json(struct umr_asic *asic, int is_halted, int include_shade json_object_set_value(json_object(wave), "hw_id", hw_id); JSON_Value *gpr_alloc = json_value_init_object(); json_object_set_number(json_object(gpr_alloc), "vgpr_base", wd->ws.gpr_alloc.vgpr_base); json_object_set_number(json_object(gpr_alloc), "vgpr_size", wd->ws.gpr_alloc.vgpr_size); json_object_set_number(json_object(gpr_alloc), "sgpr_base", wd->ws.gpr_alloc.sgpr_base); json_object_set_number(json_object(gpr_alloc), "sgpr_size", wd->ws.gpr_alloc.sgpr_size); json_object_set_value(json_object(wave), "gpr_alloc", gpr_alloc); if (is_halted && wd->ws.gpr_alloc.value != 0xbebebeef) { - int shift; - if (asic->family <= FAMILY_CIK || asic->family >= FAMILY_NV) - shift = 3; - else - shift = 4; - - int spgr_count = (wd->ws.gpr_alloc.sgpr_size + 1) << shift; + int sgpr_count; + if (asic->family <= FAMILY_AI) { + int shift = asic->family <= FAMILY_CIK ? 3 : 4; + sgpr_count = (wd->ws.gpr_alloc.sgpr_size + 1) << shift; + } else { + sgpr_count = 108; // regular SGPRs and VCC + } JSON_Value *sgpr = json_value_init_array(); - for (int x = 0; x < spgr_count; x++) { + for (int x = 0; x < sgpr_count; x++) { json_array_append_number(json_array(sgpr), wd->sgprs[x]); } json_object_set_value(json_object(wave), "sgpr", sgpr); JSON_Value *threads = json_value_init_array(); int num_threads = wd->num_threads; for (int thread = 0; thread < num_threads; thread++) { unsigned live = thread < 32 ? (wd->ws.exec_lo & (1u << thread)) : (wd->ws.exec_hi & (1u << (thread - 32))); json_array_append_boolean(json_array(threads), live ? 1 : 0); } diff --git a/src/app/print_waves.c b/src/app/print_waves.c index de93f93..04a4447 100644 --- a/src/app/print_waves.c +++ b/src/app/print_waves.c @@ -467,21 +467,21 @@ static void umr_print_waves_gfx_10_11(struct umr_asic *asic) (unsigned)wd->ws.hw_id1.wave_id, // TODO: wgp printed out won't match geometry for now w.r.t. to SPI (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo, (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo, (unsigned long)wd->ws.hw_id1.value, (unsigned long)wd->ws.hw_id2.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value); } if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) { - for (x = 0; x < 112; x += 4) + for (x = 0; x < 108; x += 4) printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", (unsigned)(x), (unsigned)(x + 3), (unsigned long)wd->sgprs[x], (unsigned long)wd->sgprs[x+1], (unsigned long)wd->sgprs[x+2], (unsigned long)wd->sgprs[x+3]); if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) { @@ -567,21 +567,21 @@ static void umr_print_waves_gfx_10_11(struct umr_asic *asic) PP(hw_id2, vm_id); Hv("GPR_ALLOC", wd->ws.gpr_alloc.value); PP(gpr_alloc, vgpr_base); PP(gpr_alloc, vgpr_size); PP(gpr_alloc, sgpr_base); PP(gpr_alloc, sgpr_size); if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) { printf("\n\nSGPRS:\n"); - for (x = 0; x < 112; x += 4) + for (x = 0; x < 108; x += 4) printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", (unsigned)(x), (unsigned)(x + 3), (unsigned long)wd->sgprs[x], (unsigned long)wd->sgprs[x+1], (unsigned long)wd->sgprs[x+2], (unsigned long)wd->sgprs[x+3]); if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) { for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4, y += 4) { diff --git a/src/lib/lowlevel/linux/read_gprwave.c b/src/lib/lowlevel/linux/read_gprwave.c index e861ee4..6d68b7e 100644 --- a/src/lib/lowlevel/linux/read_gprwave.c +++ b/src/lib/lowlevel/linux/read_gprwave.c @@ -99,21 +99,21 @@ static int read_gpr_gprwave(struct umr_asic *asic, int v_or_s, uint32_t thread, id.gpr.thread = thread; size = 4 * ((ws->gpr_alloc.vgpr_size + 1) << asic->parameters.vgpr_granularity); } } else { id.se = ws->hw_id1.se_id; id.sh = ws->hw_id1.sa_id; id.cu = ((ws->hw_id1.wgp_id << 2) | ws->hw_id1.simd_id); id.wave = ws->hw_id1.wave_id; if (v_or_s == 0) { id.gpr.thread = 0; - size = 4 * 112; + size = 4 * 124; // regular SGPRs, VCC, and TTMPs } else { id.gpr.thread = thread; size = 4 * ((ws->gpr_alloc.vgpr_size + 1) << asic->parameters.vgpr_granularity); } } id.gpr.vpgr_or_sgpr = v_or_s; id.xcc_id = asic->options.vm_partition == -1 ? 0 : asic->options.vm_partition; r = ioctl(asic->fd.gprwave, AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE, &id); if (r) -- 2.40.0