On Mon, Jun 6, 2022 at 5:04 PM Joseph Greathouse <Joseph.Greathouse@xxxxxxx> wrote: > > The MODE register contains detailed per-wave information, but UMR > skipped printing it. This patch adds the ability to print each wave's > MODE register as part of the wave scan operation, and prints the MODE > register's sub-fields as part of the deeper print option. > > Signed-off-by: Joseph Greathouse <Joseph.Greathouse@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > src/app/print_waves.c | 41 ++++++++++++++++++++++++++++++++++------ > src/lib/scan_waves.c | 44 +++++++++++++++++++++++++++++++++++++++++++ > src/umr.h | 19 +++++++++++++++++++ > 3 files changed, 98 insertions(+), 6 deletions(-) > > diff --git a/src/app/print_waves.c b/src/app/print_waves.c > index 07dae2d..daedc24 100644 > --- a/src/app/print_waves.c > +++ b/src/app/print_waves.c > @@ -91,7 +91,7 @@ static void umr_print_waves_si_ai(struct umr_asic *asic) > if (!asic->options.bitfields && first) { > static const char* titles[] = { > "WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "INST_DW1", "EXEC_HI", "EXEC_LO", "HW_ID", "GPRALLOC", > - "LDSALLOC", "TRAPSTS", "IBSTS", "TBA_HI", "TBA_LO", "TMA_HI", "TMA_LO", "IB_DBG0", "M0", NULL > + "LDSALLOC", "TRAPSTS", "IBSTS", "TBA_HI", "TBA_LO", "TMA_HI", "TMA_LO", "IB_DBG0", "M0", "MODE", NULL > }; > first = 0; > printf("SE SH CU SIMD WAVE# "); > @@ -106,13 +106,13 @@ static void umr_print_waves_si_ai(struct umr_asic *asic) > " %08lx %08lx %08lx " // wave_status pc/hi/lo > "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo > "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS > -"%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n"); > +"%08lx %08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0 MODE\n"); > "\n", > (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id, > (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo, > (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo, > (unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value, > -(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0 > +(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value > ); > if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) { > for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4) > @@ -318,6 +318,24 @@ static void umr_print_waves_si_ai(struct umr_asic *asic) > PP(trapsts, excp_cycle); > PP(trapsts, dp_rate); > > + Hv("MODE", wd->ws.mode.value); > + PP(mode, fp_round); > + PP(mode, fp_denorm); > + PP(mode, dx10_clamp); > + PP(mode, ieee); > + PP(mode, lod_clamped); > + PP(mode, debug_en); > + PP(mode, excp_en); > + if (asic->family > FAMILY_VI) > + PP(mode, fp16_ovfl); > + PP(mode, pops_packer0); > + PP(mode, pops_packer1); > + if (asic->family > FAMILY_VI) > + PP(mode, disable_perf); > + PP(mode, gpr_idx_en); > + PP(mode, vskip); > + PP(mode, csp); > + > printf("\n"); col = 0; > } > wd = wd->next; > @@ -393,7 +411,7 @@ static void umr_print_waves_nv(struct umr_asic *asic) > while (wd) { > if (!asic->options.bitfields && first) { > static const char* titles[] = { > - "WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "EXEC_HI", "EXEC_LO", "HW_ID1", "HW_ID2", "GPRALLOC", "LDSALLOC", "TRAPSTS", "IBSTS1", "IBSTS2", "IB_DBG1", "M0", NULL > + "WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "EXEC_HI", "EXEC_LO", "HW_ID1", "HW_ID2", "GPRALLOC", "LDSALLOC", "TRAPSTS", "IBSTS1", "IBSTS2", "IB_DBG1", "M0", "MODE", NULL > }; > first = 0; > printf("SE SA WGP SIMD WAVE# "); > @@ -408,13 +426,13 @@ static void umr_print_waves_nv(struct umr_asic *asic) > " %08lx %08lx %08lx " // wave_status pc/hi/lo > "%08lx %08lx %08lx " // inst0 exec hi/lo > "%08lx %08lx %08lx %08lx %08lx %08lx %08lx " // HW_ID1 HW_ID2 GPR/LDSALLOC TRAP/IB STS > -"%08lx %08lx " // IB_DBG1 M0\n"); > +"%08lx %08lx %08lx " // IB_DBG1 M0 MODE\n"); > "\n", > (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id1.simd_id, (unsigned)wd->ws.hw_id1.wave_id, // TODO: wgp printed out won't match geometry for now w.r.t. to SPI > (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo, > (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo, > (unsigned long)wd->ws.hw_id1.value, (unsigned long)wd->ws.hw_id2.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, > -(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0); > +(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value); > if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) { > for (x = 0; x < 112; x += 4) > printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n", > @@ -627,6 +645,17 @@ static void umr_print_waves_nv(struct umr_asic *asic) > PP(trapsts, excp_group_mask); > PP(trapsts, utc_error); > > + Hv("MODE", wd->ws.mode.value); > + PP(mode, fp_round); > + PP(mode, fp_denorm); > + PP(mode, dx10_clamp); > + PP(mode, ieee); > + PP(mode, lod_clamped); > + PP(mode, debug_en); > + PP(mode, excp_en); > + PP(mode, fp16_ovfl); > + PP(mode, disable_perf); > + > printf("\n"); col = 0; > } > wd = wd->next; > diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c > index dae858e..71e595f 100644 > --- a/src/lib/scan_waves.c > +++ b/src/lib/scan_waves.c > @@ -139,6 +139,7 @@ int umr_read_wave_status_via_mmio_gfx8_9(struct umr_asic *asic, uint32_t simd, u > } > dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_DBG0")->addr); > dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_M0")->addr); > + dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_MODE")->addr); > > return 0; > } > @@ -163,6 +164,7 @@ int umr_read_wave_status_via_mmio_gfx10(struct umr_asic *asic, uint32_t wave, ui > dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_STS2")->addr); > dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_DBG1")->addr); > dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_M0")->addr); > + dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_MODE")->addr); > > return 0; > } > @@ -258,6 +260,19 @@ static int umr_parse_wave_data_gfx_8(struct umr_asic *asic, struct umr_wave_stat > ws->tma_hi = buf[x++]; > ws->ib_dbg0 = buf[x++]; > ws->m0 = buf[x++]; > + > + ws->mode.value = value = buf[x++]; > + reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE"); > + ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value); > + ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value); > + ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value); > + ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value); > + ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value); > + ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value); > + ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value); > + ws->mode.gpr_idx_en = umr_bitslice_reg(asic, reg, "GPR_IDX_EN", value); > + ws->mode.vskip = umr_bitslice_reg(asic, reg, "VSKIP", value); > + ws->mode.csp = umr_bitslice_reg(asic, reg, "CSP", value); > return 0; > } > > @@ -348,6 +363,23 @@ static int umr_parse_wave_data_gfx_9(struct umr_asic *asic, struct umr_wave_stat > > ws->ib_dbg0 = buf[x++]; > ws->m0 = buf[x++]; > + > + ws->mode.value = value = buf[x++]; > + reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE"); > + ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value); > + ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value); > + ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value); > + ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value); > + ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value); > + ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value); > + ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value); > + ws->mode.fp16_ovfl = umr_bitslice_reg(asic, reg, "FP16_OVFL", value); > + ws->mode.pops_packer0 = umr_bitslice_reg(asic, reg, "POPS_PACKER0", value); > + ws->mode.pops_packer1 = umr_bitslice_reg(asic, reg, "POPS_PACKER1", value); > + ws->mode.disable_perf = umr_bitslice_reg(asic, reg, "DISABLE_PERF", value); > + ws->mode.gpr_idx_en = umr_bitslice_reg(asic, reg, "GPR_IDX_EN", value); > + ws->mode.vskip = umr_bitslice_reg(asic, reg, "VSKIP", value); > + ws->mode.csp = umr_bitslice_reg(asic, reg, "CSP", value); > return 0; > } > > @@ -468,6 +500,18 @@ static int umr_parse_wave_data_gfx_10(struct umr_asic *asic, struct umr_wave_sta > > ws->ib_dbg1 = buf[x++]; > ws->m0 = buf[x++]; > + > + ws->mode.value = value = buf[x++]; > + reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE"); > + ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value); > + ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value); > + ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value); > + ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value); > + ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value); > + ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value); > + ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value); > + ws->mode.fp16_ovfl = umr_bitslice_reg(asic, reg, "FP16_OVFL", value); > + ws->mode.disable_perf = umr_bitslice_reg(asic, reg, "DISABLE_PERF", value); > return 0; > } > > diff --git a/src/umr.h b/src/umr.h > index ca62560..f541dd3 100644 > --- a/src/umr.h > +++ b/src/umr.h > @@ -713,6 +713,25 @@ struct umr_wave_status { > excp_group_mask, > utc_error; > } trapsts; > + > + struct { > + uint32_t > + value, > + fp_round, > + fp_denorm, > + dx10_clamp, > + ieee, > + lod_clamped, > + debug_en, > + excp_en, > + fp16_ovfl, > + pops_packer0, > + pops_packer1, > + disable_perf, > + gpr_idx_en, > + vskip, > + csp; > + } mode; > }; > > struct umr_wave_data { > -- > 2.25.1 >