From: Nicolai Hähnle <nicolai.haehnle@xxxxxxx> The hardware adds the alloc base already, no need to do it in the tool. Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com> --- src/app/print_waves.c | 8 ++++---- src/lib/read_sgpr.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/app/print_waves.c b/src/app/print_waves.c index 1efd8a1..a9aaf39 100644 --- a/src/app/print_waves.c +++ b/src/app/print_waves.c @@ -75,22 +75,22 @@ void umr_print_waves(struct umr_asic *asic) "\n", (unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id, (unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned long)ws.pc_lo, (unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned long)ws.exec_hi, (unsigned long)ws.exec_lo, (unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned long)ws.ib_sts.value, (unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, (unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0 ); if (ws.wave_status.halt) for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", - (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x), - (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x + 3), + (unsigned)(x), + (unsigned)(x + 3), (unsigned long)sgprs[x], (unsigned long)sgprs[x+1], (unsigned long)sgprs[x+2], (unsigned long)sgprs[x+3]); pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2); umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes); for (x = 0; x < sizeof(opcodes)/4; x++) { printf(">pgm[%lu@%llx] = %08lx\n", (unsigned long)ws.hw_id.vm_id, @@ -156,22 +156,22 @@ void umr_print_waves(struct umr_asic *asic) Hv("GPR_ALLOC", ws.gpr_alloc.value); PP(gpr_alloc, vgpr_base); PP(gpr_alloc, vgpr_size); PP(gpr_alloc, sgpr_base); PP(gpr_alloc, sgpr_size); if (ws.wave_status.halt) { printf("\n\nSGPRS:\n"); for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n", - (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x), - (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x + 3), + (unsigned)(x), + (unsigned)(x + 3), (unsigned long)sgprs[x], (unsigned long)sgprs[x+1], (unsigned long)sgprs[x+2], (unsigned long)sgprs[x+3]); } printf("\n\nPGM_MEM:\n"); pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2); umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes); for (x = 0; x < sizeof(opcodes)/4; x++) { diff --git a/src/lib/read_sgpr.c b/src/lib/read_sgpr.c index cceb189..427cfc5 100644 --- a/src/lib/read_sgpr.c +++ b/src/lib/read_sgpr.c @@ -56,27 +56,28 @@ int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t * uint64_t addr, shift; if (asic->family <= FAMILY_CIK) shift = 3; // on SI..CIK allocations were done in 8-dword blocks else shift = 4; // on VI allocations are in 16-dword blocks if (!asic->options.no_kernel) { addr = (1ULL << 60) | // reading SGPRs - ((uint64_t)ws->gpr_alloc.sgpr_base << shift) | // starting address to read from + ((uint64_t)0) | // starting address to read from ((uint64_t)ws->hw_id.se_id << 12) | ((uint64_t)ws->hw_id.sh_id << 20) | ((uint64_t)ws->hw_id.cu_id << 28) | ((uint64_t)ws->hw_id.wave_id << 36) | ((uint64_t)ws->hw_id.simd_id << 44) | (0ULL << 52); // thread_id lseek(asic->fd.gpr, addr, SEEK_SET); return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 1) << shift)); } else { umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id); - wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, ws->gpr_alloc.sgpr_base << shift, 0, (ws->gpr_alloc.sgpr_size + 1) << shift, dst); + wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, 0, 0, + (ws->gpr_alloc.sgpr_size + 1) << shift, dst); umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); return 0; } } -- 2.11.0