We overload "cu" to mean "wgp" in a bunch of places, but max_cu_per_sh is always in terms of CUs. Signed-off-by: Nicolai Hähnle <nicolai.haehnle@xxxxxxx> --- src/lib/scan_waves.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c index 767520c..3279cc2 100644 --- a/src/lib/scan_waves.c +++ b/src/lib/scan_waves.c @@ -618,48 +618,50 @@ static int umr_scan_wave_simd(struct umr_asic *asic, uint32_t se, uint32_t sh, u return 0; } /** * umr_scan_wave_data - Scan for any halted valid waves * * Returns NULL on error (or no waves found). */ struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic) { - uint32_t se, sh, cu, simd; + uint32_t se, sh, simd; struct umr_wave_data *ohead, *head, **ptail; int r; ohead = head = calloc(1, sizeof *head); if (!head) { asic->err_msg("[ERROR]: Out of memory\n"); return NULL; } ptail = &head; for (se = 0; se < asic->config.gfx.max_shader_engines; se++) - for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) - for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) { + for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) { if (asic->family <= FAMILY_AI) { - asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, &(*ptail)->ws); - if ((*ptail)->ws.sq_info.busy) { - for (simd = 0; simd < 4; simd++) { - r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail); - if (r < 0) - goto error; + for (uint32_t cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) { + asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, &(*ptail)->ws); + if ((*ptail)->ws.sq_info.busy) { + for (simd = 0; simd < 4; simd++) { + r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail); + if (r < 0) + goto error; + } } } } else { + for (uint32_t wgp = 0; wgp < asic->config.gfx.max_cu_per_sh / 2; wgp++) for (simd = 0; simd < 4; simd++) { - asic->wave_funcs.get_wave_sq_info(asic, se, sh, MANY_TO_INSTANCE(cu, simd), &(*ptail)->ws); + asic->wave_funcs.get_wave_sq_info(asic, se, sh, MANY_TO_INSTANCE(wgp, simd), &(*ptail)->ws); if ((*ptail)->ws.sq_info.busy) { - r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail); + r = umr_scan_wave_simd(asic, se, sh, wgp, simd, &ptail); if (r < 0) goto error; } } } } // drop the pre-allocated tail node free(*ptail); *ptail = NULL; -- 2.40.0