From: Nicolai H�¤hnle <nicolai.haehnle@xxxxxxx> Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com> --- src/app/print_waves.c | 40 +++++++++++++++++++++++++++++++++++++++- src/lib/read_gpr.c | 30 ++++++++++++++++++++++++++++++ src/umr.h | 1 + 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/app/print_waves.c b/src/app/print_waves.c index a9aaf39..a72d224 100644 --- a/src/app/print_waves.c +++ b/src/app/print_waves.c @@ -29,20 +29,22 @@ #define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)ws.x); #define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)ws.x); #define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x); #define Hv(x, y) if (col) { printf("\n"); }; col = 0; printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y); void umr_print_waves(struct umr_asic *asic) { uint32_t x, se, sh, cu, simd, wave, sgprs[1024], shift, opcodes[8]; + uint32_t vgprs[64 * 256]; + uint32_t thread; uint64_t pgm_addr; struct umr_wave_status ws; int first = 1, col = 0; if (asic->options.halt_waves) umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT); if (asic->family <= FAMILY_CIK) shift = 3; // on SI..CIK allocations were done in 8-dword blocks else @@ -50,24 +52,36 @@ void umr_print_waves(struct umr_asic *asic) for (se = 0; se < asic->config.gfx.max_shader_engines; se++) for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) { umr_get_wave_sq_info(asic, se, sh, cu, &ws); if (ws.sq_info.busy) { for (simd = 0; simd < 4; simd++) for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment... umr_get_wave_status(asic, se, sh, cu, simd, wave, &ws); if (ws.wave_status.halt || ws.wave_status.valid) { + unsigned have_vgprs = 0; + // grab sgprs.. - if (ws.wave_status.halt) + if (ws.wave_status.halt) { umr_read_sgprs(asic, &ws, &sgprs[0]); + if (options.bitfields) { + have_vgprs = 1; + for (thread = 0; thread < 64; ++thread) { + if (umr_read_vgprs(asic, &ws, thread, + &vgprs[256 * thread]) < 0) + have_vgprs = 0; + } + } + } + if (!options.bitfields && first) { first = 0; printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n"); } if (!options.bitfields) { printf( "%u %u %u %u %u " // se/sh/cu/simd/wave "%08lx %08lx %08lx " // wave_status pc/hi/lo "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS @@ -164,20 +178,44 @@ void umr_print_waves(struct umr_asic *asic) for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", (unsigned)(x), (unsigned)(x + 3), (unsigned long)sgprs[x], (unsigned long)sgprs[x+1], (unsigned long)sgprs[x+2], (unsigned long)sgprs[x+3]); } + + if (have_vgprs) { + printf("\n"); + for (x = 0; x < ((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) { + if (x % 16 == 0) { + if (x == 0) + printf("VGPRS: "); + else + printf(" "); + for (thread = 0; thread < 64; ++thread) { + unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread)) + : (ws.exec_hi & (1u << (thread - 32))); + printf(live ? " t%02u " : " (t%02u) ", thread); + } + printf("\n"); + } + + printf(" [%3u] = {", x); + for (thread = 0; thread < 64; ++thread) + printf(" %08x", vgprs[thread * 256 + x]); + printf(" }\n"); + } + } + printf("\n\nPGM_MEM:\n"); pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2); umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes); for (x = 0; x < sizeof(opcodes)/4; x++) { if (x == (sizeof(opcodes)/8)) printf("*\t"); else printf("\t"); printf("pgm[%lu@%llx] = %08lx\n", (unsigned long)ws.hw_id.vm_id, diff --git a/src/lib/read_gpr.c b/src/lib/read_gpr.c index 427cfc5..669a49b 100644 --- a/src/lib/read_gpr.c +++ b/src/lib/read_gpr.c @@ -74,10 +74,40 @@ int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t * lseek(asic->fd.gpr, addr, SEEK_SET); return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 1) << shift)); } else { umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id); wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, 0, 0, (ws->gpr_alloc.sgpr_size + 1) << shift, dst); umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); return 0; } } + + +int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst) +{ + uint64_t addr; + + if (asic->family < FAMILY_AI) + return -1; + + if (!asic->options.no_kernel) { + addr = + (0ULL << 60) | // reading VGPRs + ((uint64_t)0) | // starting address to read from + ((uint64_t)ws->hw_id.se_id << 12) | + ((uint64_t)ws->hw_id.sh_id << 20) | + ((uint64_t)ws->hw_id.cu_id << 28) | + ((uint64_t)ws->hw_id.wave_id << 36) | + ((uint64_t)ws->hw_id.simd_id << 44) | + ((uint64_t)thread << 52); + + lseek(asic->fd.gpr, addr, SEEK_SET); + return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.vgpr_size + 1) << 2)); + } else { + umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id); + wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, thread, 0x400, + (ws->gpr_alloc.vgpr_size + 1) << 2, dst); + umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + return 0; + } +} diff --git a/src/umr.h b/src/umr.h index e49c80c..3d2252e 100644 --- a/src/umr.h +++ b/src/umr.h @@ -494,20 +494,21 @@ void umr_free_asic(struct umr_asic *asic); void umr_free_maps(struct umr_asic *asic); void umr_close_asic(struct umr_asic *asic); // call this to close a fully open asic int umr_query_drm(struct umr_asic *asic, int field, void *ret, int size); void umr_enumerate_devices(void); int umr_update(struct umr_asic *asic, char *script); /* lib helpers */ int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws); int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, struct umr_wave_status *ws); int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *dst); +int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst); int umr_read_sensor(struct umr_asic *asic, int sensor, void *dst, int *size); /* mmio helpers */ // init the mmio lookup table int umr_create_mmio_accel(struct umr_asic *asic); // find the word address of a register uint32_t umr_find_reg(struct umr_asic *asic, char *regname); // find the register data for a register -- 2.11.0