[PATCH umr 04/17] gfx10+: iterate only over existing WGPs when scanning waves

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We overload "cu" to mean "wgp" in a bunch of places, but max_cu_per_sh
is always in terms of CUs.

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@xxxxxxx>
---
 src/lib/scan_waves.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
index 767520c..3279cc2 100644
--- a/src/lib/scan_waves.c
+++ b/src/lib/scan_waves.c
@@ -618,48 +618,50 @@ static int umr_scan_wave_simd(struct umr_asic *asic, uint32_t se, uint32_t sh, u
 	return 0;
 }
 
 /**
  * umr_scan_wave_data - Scan for any halted valid waves
  *
  * Returns NULL on error (or no waves found).
  */
 struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
 {
-	uint32_t se, sh, cu, simd;
+	uint32_t se, sh, simd;
 	struct umr_wave_data *ohead, *head, **ptail;
 	int r;
 
 	ohead = head = calloc(1, sizeof *head);
 	if (!head) {
 		asic->err_msg("[ERROR]: Out of memory\n");
 		return NULL;
 	}
 	ptail = &head;
 
 	for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
-	for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
-	for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+	for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) {
 		if (asic->family <= FAMILY_AI) {
-			asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, &(*ptail)->ws);
-			if ((*ptail)->ws.sq_info.busy) {
-				for (simd = 0; simd < 4; simd++) {
-					r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail);
-					if (r < 0)
-						goto error;
+			for (uint32_t cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+				asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, &(*ptail)->ws);
+				if ((*ptail)->ws.sq_info.busy) {
+					for (simd = 0; simd < 4; simd++) {
+						r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail);
+						if (r < 0)
+							goto error;
+					}
 				}
 			}
 		} else {
+			for (uint32_t wgp = 0; wgp < asic->config.gfx.max_cu_per_sh / 2; wgp++)
 			for (simd = 0; simd < 4; simd++) {
-				asic->wave_funcs.get_wave_sq_info(asic, se, sh, MANY_TO_INSTANCE(cu, simd), &(*ptail)->ws);
+				asic->wave_funcs.get_wave_sq_info(asic, se, sh, MANY_TO_INSTANCE(wgp, simd), &(*ptail)->ws);
 				if ((*ptail)->ws.sq_info.busy) {
-					r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail);
+					r = umr_scan_wave_simd(asic, se, sh, wgp, simd, &ptail);
 					if (r < 0)
 						goto error;
 				}
 			}
 		}
 	}
 
 	// drop the pre-allocated tail node
 	free(*ptail);
 	*ptail = NULL;
-- 
2.40.0




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux