Signed-off-by: Tom St Denis <tom.stdenis at amd.com> --- doc/sphinx/source/libwave_status.rst | 28 +++ src/app/print_waves.c | 379 +++++++++++++++++------------------ src/lib/CMakeLists.txt | 1 + src/lib/scan_waves.c | 97 +++++++++ src/umr.h | 9 + 5 files changed, 314 insertions(+), 200 deletions(-) create mode 100644 src/lib/scan_waves.c diff --git a/doc/sphinx/source/libwave_status.rst b/doc/sphinx/source/libwave_status.rst index 0f58a3c91855..2e16a4ac0cb6 100644 --- a/doc/sphinx/source/libwave_status.rst +++ b/doc/sphinx/source/libwave_status.rst @@ -43,6 +43,34 @@ can be read with the following function: This will populate many of the fields of the structure 'umr_wave_status'. An example of reading them can be found in src/app/print_waves.c. +--------------------- +Scanning Halted Waves +--------------------- + +If the waves have been halted (say with the function umr_sq_cmd_halt_waves()) then +a list of halted valid waves can be made with the following function: + + +:: + + struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic) + +This will return NULL on error (or no halted waves) or a pointer +to the following structure: + +:: + + struct umr_wave_data { + uint32_t vgprs[64 * 256], sgprs[1024]; + int se, sh, cu, simd, wave, have_vgprs; + struct umr_wave_status ws; + struct umr_wave_thread *threads; + struct umr_wave_data *next; + }; + +The list of waves are stored as a linked list terminated by the +last node having 'next' point to NULL. + ------------ Reading GPRs ------------ diff --git a/src/app/print_waves.c b/src/app/print_waves.c index 563fc65bedb7..d901bc902ff3 100644 --- a/src/app/print_waves.c +++ b/src/app/print_waves.c @@ -24,11 +24,11 @@ */ #include "umrapp.h" -#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #y, (unsigned)ws.x.y); -#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #y, (unsigned long)ws.x.y); +#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #y, (unsigned)wd->ws.x.y); +#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #y, (unsigned long)wd->ws.x.y); -#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)ws.x); -#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)ws.x); +#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)wd->ws.x); +#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)wd->ws.x); #define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x); #define Hv(x, y) if (col) { printf("\n"); }; col = 0; printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y); @@ -37,11 +37,9 @@ void umr_print_waves(struct umr_asic *asic) { - uint32_t x, y, se, sh, cu, simd, wave, sgprs[1024], shift; - uint32_t vgprs[64 * 256]; - uint32_t thread; + uint32_t x, y, shift, thread; uint64_t pgm_addr; - struct umr_wave_status ws; + struct umr_wave_data *wd, *owd; int first = 1, col = 0; if (asic->options.halt_waves) @@ -52,228 +50,209 @@ void umr_print_waves(struct umr_asic *asic) else shift = 4; // on VI allocations are in 16-dword blocks - for (se = 0; se < asic->config.gfx.max_shader_engines; se++) - for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) - for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) { - umr_get_wave_sq_info(asic, se, sh, cu, &ws); - if (ws.sq_info.busy) { - for (simd = 0; simd < 4; simd++) - for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment... - umr_get_wave_status(asic, se, sh, cu, simd, wave, &ws); - if (ws.wave_status.halt || ws.wave_status.valid) { - unsigned have_vgprs = 0; - - // grab sgprs.. - if (ws.wave_status.halt) { - umr_read_sgprs(asic, &ws, &sgprs[0]); - - if (asic->options.bitfields) { - have_vgprs = 1; - for (thread = 0; thread < 64; ++thread) { - if (umr_read_vgprs(asic, &ws, thread, - &vgprs[256 * thread]) < 0) - have_vgprs = 0; - } - } - } - - if (!asic->options.bitfields && first) { - first = 0; - printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n"); - } - if (!asic->options.bitfields) { - printf( + owd = wd = umr_scan_wave_data(asic); + while (wd) { + if (!asic->options.bitfields && first) { + first = 0; + printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n"); + } + if (!asic->options.bitfields) { + printf( "%u %u %u %u %u " // se/sh/cu/simd/wave "%08lx %08lx %08lx " // wave_status pc/hi/lo "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS "%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n"); "\n", -(unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id, -(unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned long)ws.pc_lo, -(unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned long)ws.exec_hi, (unsigned long)ws.exec_lo, -(unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned long)ws.ib_sts.value, -(unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, (unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0 +(unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id, +(unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo, +(unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo, +(unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value, +(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0 ); - if (ws.wave_status.halt) { - for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) - printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", - (unsigned)(x), - (unsigned)(x + 3), - (unsigned long)sgprs[x], - (unsigned long)sgprs[x+1], - (unsigned long)sgprs[x+2], - (unsigned long)sgprs[x+3]); - - if (ws.wave_status.trap_en || ws.wave_status.priv) { - for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) { - printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", - (x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP", - (unsigned)(y), - (unsigned)(y + 3), - (unsigned long)sgprs[x], - (unsigned long)sgprs[x+1], - (unsigned long)sgprs[x+2], - (unsigned long)sgprs[x+3]); + if (wd->ws.wave_status.halt) { + for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) + printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", + (unsigned)(x), + (unsigned)(x + 3), + (unsigned long)wd->sgprs[x], + (unsigned long)wd->sgprs[x+1], + (unsigned long)wd->sgprs[x+2], + (unsigned long)wd->sgprs[x+3]); - // restart numbering on SI..VI with TTMP0 - y += 4; - if (x == 0x6C && asic->family <= FAMILY_VI) - y = 0; - } - } - } + if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) { + for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) { + printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", + (x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP", + (unsigned)(y), + (unsigned)(y + 3), + (unsigned long)wd->sgprs[x], + (unsigned long)wd->sgprs[x+1], + (unsigned long)wd->sgprs[x+2], + (unsigned long)wd->sgprs[x+3]); - pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2; - umr_vm_disasm(asic, ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), NUM_OPCODE_WORDS*4); - } else { - first = 0; - printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n", - (unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id); + // restart numbering on SI..VI with TTMP0 + y += 4; + if (x == 0x6C && asic->family <= FAMILY_VI) + y = 0; + } + } + } - H("Main Registers"); - X(pc_hi); - X(pc_lo); - X(wave_inst_dw0); - X(wave_inst_dw1); - X(exec_hi); - X(exec_lo); - X(tba_hi); - X(tba_lo); - X(tma_hi); - X(tma_lo); - X(m0); - X(ib_dbg0); + pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2; + umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4); + } else { + first = 0; + printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n", + (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id); - Hv("Wave_Status", ws.wave_status.value); - PP(wave_status, scc); - PP(wave_status, execz); - PP(wave_status, vccz); - PP(wave_status, in_tg); - PP(wave_status, halt); - PP(wave_status, valid); - PP(wave_status, spi_prio); - PP(wave_status, wave_prio); - PP(wave_status, priv); - PP(wave_status, trap_en); - PP(wave_status, trap); - PP(wave_status, ttrace_en); - PP(wave_status, export_rdy); - PP(wave_status, in_barrier); - PP(wave_status, ecc_err); - PP(wave_status, skip_export); - PP(wave_status, perf_en); - PP(wave_status, cond_dbg_user); - PP(wave_status, cond_dbg_sys); - PP(wave_status, data_atc); - PP(wave_status, inst_atc); - PP(wave_status, dispatch_cache_ctrl); - PP(wave_status, must_export); + H("Main Registers"); + X(pc_hi); + X(pc_lo); + X(wave_inst_dw0); + X(wave_inst_dw1); + X(exec_hi); + X(exec_lo); + X(tba_hi); + X(tba_lo); + X(tma_hi); + X(tma_lo); + X(m0); + X(ib_dbg0); - Hv("HW_ID", ws.hw_id.value); - PP(hw_id, wave_id); - PP(hw_id, simd_id); - PP(hw_id, pipe_id); - PP(hw_id, cu_id); - PP(hw_id, sh_id); - PP(hw_id, se_id); - PP(hw_id, tg_id); - PP(hw_id, vm_id); - PP(hw_id, queue_id); - PP(hw_id, state_id); - PP(hw_id, me_id); + Hv("Wave_Status", wd->ws.wave_status.value); + PP(wave_status, scc); + PP(wave_status, execz); + PP(wave_status, vccz); + PP(wave_status, in_tg); + PP(wave_status, halt); + PP(wave_status, valid); + PP(wave_status, spi_prio); + PP(wave_status, wave_prio); + PP(wave_status, priv); + PP(wave_status, trap_en); + PP(wave_status, trap); + PP(wave_status, ttrace_en); + PP(wave_status, export_rdy); + PP(wave_status, in_barrier); + PP(wave_status, ecc_err); + PP(wave_status, skip_export); + PP(wave_status, perf_en); + PP(wave_status, cond_dbg_user); + PP(wave_status, cond_dbg_sys); + PP(wave_status, data_atc); + PP(wave_status, inst_atc); + PP(wave_status, dispatch_cache_ctrl); + PP(wave_status, must_export); - Hv("GPR_ALLOC", ws.gpr_alloc.value); - PP(gpr_alloc, vgpr_base); - PP(gpr_alloc, vgpr_size); - PP(gpr_alloc, sgpr_base); - PP(gpr_alloc, sgpr_size); + Hv("HW_ID", wd->ws.hw_id.value); + PP(hw_id, wave_id); + PP(hw_id, simd_id); + PP(hw_id, pipe_id); + PP(hw_id, cu_id); + PP(hw_id, sh_id); + PP(hw_id, se_id); + PP(hw_id, tg_id); + PP(hw_id, vm_id); + PP(hw_id, queue_id); + PP(hw_id, state_id); + PP(hw_id, me_id); - if (ws.wave_status.halt) { - printf("\n\nSGPRS:\n"); - for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) - printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", - (unsigned)(x), - (unsigned)(x + 3), - (unsigned long)sgprs[x], - (unsigned long)sgprs[x+1], - (unsigned long)sgprs[x+2], - (unsigned long)sgprs[x+3]); + Hv("GPR_ALLOC", wd->ws.gpr_alloc.value); + PP(gpr_alloc, vgpr_base); + PP(gpr_alloc, vgpr_size); + PP(gpr_alloc, sgpr_base); + PP(gpr_alloc, sgpr_size); - if (ws.wave_status.trap_en || ws.wave_status.priv) { - for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) { - // only print label once each - if ((asic->family <= FAMILY_VI && x < 0x6C + 8) || - (asic->family > FAMILY_VI && x < 0x6C + 4)) - printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP"); - printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", - (unsigned)(y), - (unsigned)(y + 3), - (unsigned long)sgprs[x], - (unsigned long)sgprs[x+1], - (unsigned long)sgprs[x+2], - (unsigned long)sgprs[x+3]); + if (wd->ws.wave_status.halt) { + printf("\n\nSGPRS:\n"); + for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) + printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", + (unsigned)(x), + (unsigned)(x + 3), + (unsigned long)wd->sgprs[x], + (unsigned long)wd->sgprs[x+1], + (unsigned long)wd->sgprs[x+2], + (unsigned long)wd->sgprs[x+3]); - // reset count on SI..VI - y += 4; - if (x == 0x6C && asic->family <= FAMILY_VI) - y = 0; - } - } - } + if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) { + for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) { + // only print label once each + if ((asic->family <= FAMILY_VI && x < 0x6C + 8) || + (asic->family > FAMILY_VI && x < 0x6C + 4)) + printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP"); + printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", + (unsigned)(y), + (unsigned)(y + 3), + (unsigned long)wd->sgprs[x], + (unsigned long)wd->sgprs[x+1], + (unsigned long)wd->sgprs[x+2], + (unsigned long)wd->sgprs[x+3]); - if (have_vgprs) { - printf("\n"); - for (x = 0; x < ((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) { - if (x % 16 == 0) { - if (x == 0) - printf("VGPRS: "); - else - printf(" "); - for (thread = 0; thread < 64; ++thread) { - unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread)) - : (ws.exec_hi & (1u << (thread - 32))); - printf(live ? " t%02u " : " (t%02u) ", thread); - } - printf("\n"); - } + // reset count on SI..VI + y += 4; + if (x == 0x6C && asic->family <= FAMILY_VI) + y = 0; + } + } + } - printf(" [%3u] = {", x); - for (thread = 0; thread < 64; ++thread) - printf(" %08x", vgprs[thread * 256 + x]); - printf(" }\n"); - } + if (wd->have_vgprs) { + printf("\n"); + for (x = 0; x < ((wd->ws.gpr_alloc.vgpr_size + 1) << 2); ++x) { + if (x % 16 == 0) { + if (x == 0) + printf("VGPRS: "); + else + printf(" "); + for (thread = 0; thread < 64; ++thread) { + unsigned live = thread < 32 ? (wd->ws.exec_lo & (1u << thread)) + : (wd->ws.exec_hi & (1u << (thread - 32))); + printf(live ? " t%02u " : " (t%02u) ", thread); } + printf("\n"); + } - printf("\n\nPGM_MEM:\n"); - pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2; - umr_vm_disasm(asic, ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), NUM_OPCODE_WORDS*4); + printf(" [%3u] = {", x); + for (thread = 0; thread < 64; ++thread) + printf(" %08x", wd->vgprs[thread * 256 + x]); + printf(" }\n"); + } + } - Hv("LDS_ALLOC", ws.lds_alloc.value); - PP(lds_alloc, lds_base); - PP(lds_alloc, lds_size); + printf("\n\nPGM_MEM:\n"); + pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2; + umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4); - Hv("IB_STS", ws.ib_sts.value); - PP(ib_sts, vm_cnt); - PP(ib_sts, exp_cnt); - PP(ib_sts, lgkm_cnt); - PP(ib_sts, valu_cnt); + Hv("LDS_ALLOC", wd->ws.lds_alloc.value); + PP(lds_alloc, lds_base); + PP(lds_alloc, lds_size); - Hv("TRAPSTS", ws.trapsts.value); - PP(trapsts, excp); - PP(trapsts, excp_cycle); - PP(trapsts, dp_rate); + Hv("IB_STS", wd->ws.ib_sts.value); + PP(ib_sts, vm_cnt); + PP(ib_sts, exp_cnt); + PP(ib_sts, lgkm_cnt); + PP(ib_sts, valu_cnt); - printf("\n"); col = 0; - } + Hv("TRAPSTS", wd->ws.trapsts.value); + PP(trapsts, excp); + PP(trapsts, excp_cycle); + PP(trapsts, dp_rate); - } - } + printf("\n"); col = 0; } + wd = wd->next; } if (first) printf("No active waves!\n"); + wd = owd; + while (wd) { + owd = wd->next; + free(wd); + wd = owd; + } + if (asic->options.halt_waves) umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME); } diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index c028c550fa3e..8d5427d63aae 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -21,6 +21,7 @@ add_library(umrcore STATIC read_vram.c ring_decode.c scan_config.c + scan_waves.c sq_cmd_halt_waves.c transfer_soc15.c wave_status.c diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c new file mode 100644 index 000000000000..e3e7e131c888 --- /dev/null +++ b/src/lib/scan_waves.c @@ -0,0 +1,97 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Tom St Denis <tom.stdenis at amd.com> + * + */ +#include "umr.h" + +struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic) +{ + uint32_t se, sh, cu, simd, wave, thread; + struct umr_wave_data *opwd, *ppwd, *pwd; + + ppwd = opwd = pwd = calloc(1, sizeof *pwd); + if (!pwd) { + fprintf(stderr, "[ERROR]: Out of memory\n"); + return NULL; + } + + for (se = 0; se < asic->config.gfx.max_shader_engines; se++) + for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) + for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) { + // ensure the wave data is zeroed out if it was forwarded + // from a previous iteration + memset(&pwd->ws, 0, sizeof(pwd->ws)); + + pwd->se = se; + pwd->sh = sh; + pwd->cu = cu; + umr_get_wave_sq_info(asic, se, sh, cu, &pwd->ws); + if (pwd->ws.sq_info.busy) { + for (simd = 0; simd < 4; simd++) + for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment... + pwd->simd = simd; + pwd->wave = wave; + umr_get_wave_status(asic, se, sh, cu, simd, wave, &pwd->ws); + if (pwd->ws.wave_status.halt || pwd->ws.wave_status.valid) { + // grab sgprs.. + if (pwd->ws.wave_status.halt) { + umr_read_sgprs(asic, &pwd->ws, &pwd->sgprs[0]); + + pwd->have_vgprs = 1; + for (thread = 0; thread < 64; ++thread) { + if (umr_read_vgprs(asic, &pwd->ws, thread, + &pwd->vgprs[256 * thread]) < 0) { + pwd->have_vgprs = 0; + break; + } + } + } + + pwd->next = calloc(1, sizeof(*pwd)); + if (!pwd->next) { + fprintf(stderr, "[ERROR]: Out of memory\n"); + return opwd; + } + pwd->next->se = pwd->se; + pwd->next->sh = pwd->sh; + pwd->next->cu = pwd->cu; + pwd->next->ws = pwd->ws; + ppwd = pwd; + pwd = pwd->next; + } + } + } + } + + // no waves to capture + if (opwd == pwd) { + free(pwd); + return NULL; + } + + // drop tail node + free(ppwd->next); + ppwd->next = NULL; + + return opwd; +} diff --git a/src/umr.h b/src/umr.h index 7154db7bb2c3..e99ee965527e 100644 --- a/src/umr.h +++ b/src/umr.h @@ -362,6 +362,14 @@ struct umr_wave_status { } trapsts; }; +struct umr_wave_data { + uint32_t vgprs[64 * 256], sgprs[1024]; + int se, sh, cu, simd, wave, have_vgprs; + struct umr_wave_status ws; + struct umr_wave_thread *threads; + struct umr_wave_data *next; +}; + struct umr_shaders_pgm { // VMID and length in bytes uint32_t @@ -561,6 +569,7 @@ int umr_update(struct umr_asic *asic, char *script); /* lib helpers */ int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws); +struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic); int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, struct umr_wave_status *ws); int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *dst); int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst); -- 2.14.3