Re: [PATCHv2 2/4] drm/amdkfd: Update cache info reporting for GFX v9.4.3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2023-09-06 11:44, Mukul Joshi wrote:
Update cache info reporting in sysfs to report the correct
number of CUs and associated cache information based on
different spatial partitioning modes.

Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx>
---
v1->v2:
- Revert the change in kfd_crat.c
- Add a comment to not change value of CRAT_SIBLINGMAP_SIZE.

  drivers/gpu/drm/amd/amdkfd/kfd_crat.h     |  4 ++
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 82 +++++++++++++----------
  drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  2 +-
  3 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 387a8ef49385..74c2d7a0d628 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -79,6 +79,10 @@ struct crat_header {
  #define CRAT_SUBTYPE_IOLINK_AFFINITY		5
  #define CRAT_SUBTYPE_MAX			6
+/*
+ * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
+ * as it breaks the ABI.
+ */
  #define CRAT_SIBLINGMAP_SIZE	32
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c54795682dfb..b98cc7930e4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
  static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
  				struct kfd_gpu_cache_info *pcache_info,
  				struct kfd_cu_info *cu_info,
-				int cache_type, unsigned int cu_processor_id)
+				int cache_type, unsigned int cu_processor_id,
+				struct kfd_node *knode)
  {
  	unsigned int cu_sibling_map_mask;
  	int first_active_cu;
-	int i, j, k;
+	int i, j, k, xcc, start, end;
  	struct kfd_cache_properties *pcache = NULL;
- cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
+	start = ffs(knode->xcc_mask) - 1;
+	end = start + NUM_XCC(knode->xcc_mask);
+	cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
  	cu_sibling_map_mask &=
  		((1 << pcache_info[cache_type].num_cu_shared) - 1);
  	first_active_cu = ffs(cu_sibling_map_mask);
@@ -1638,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
  		cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
  		k = 0;
- for (i = 0; i < cu_info->num_shader_engines; i++) {
-			for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
-				pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
-				pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-				pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-				pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-				k += 4;
-
-				cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4];
-				cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+		for (xcc = start; xcc < end; xcc++) {
+			for (i = 0; i < cu_info->num_shader_engines; i++) {
+				for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+					pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+					pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+					pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+					pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+					k += 4;
+
+					cu_sibling_map_mask = cu_info->cu_bitmap[start][i % 4][j + i / 4];

Shouldn't this use "xcc" as index instead of "start"?

Other than that, this patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>

P.S.: This whole function looks suspicious to me the way it exposes active/inactive CUs to user mode. I think user mode only wants to count active CUs in the sibling mask because it has no other information about inactive CUs. And reading the cu_info into cu_sibling_map_mask and then only using it in the next loop iteration is probably also wrong. It basically throws away cu_info from the last xcc,SE,SA. Or maybe I just don't understand what this function is trying to do.


+					cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+				}
  			}
  		}
  		pcache->sibling_map_size = k;
@@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
  static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
  {
  	struct kfd_gpu_cache_info *pcache_info = NULL;
-	int i, j, k;
+	int i, j, k, xcc, start, end;
  	int ct = 0;
  	unsigned int cu_processor_id;
  	int ret;
@@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
  	 *			then it will consider only one CU from
  	 *			the shared unit
  	 */
+	start = ffs(kdev->xcc_mask) - 1;
+	end = start + NUM_XCC(kdev->xcc_mask);
+
  	for (ct = 0; ct < num_of_cache_types; ct++) {
  		cu_processor_id = gpu_processor_id;
  		if (pcache_info[ct].cache_level == 1) {
-			for (i = 0; i < pcu_info->num_shader_engines; i++) {
-				for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
-					for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
-
-						ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
-									pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct,
-									cu_processor_id, k);
-
-						if (ret < 0)
-							break;
-
-						if (!ret) {
-							num_of_entries++;
-							list_add_tail(&props_ext->list, &dev->cache_props);
+			for (xcc = start; xcc < end; xcc++) {
+				for (i = 0; i < pcu_info->num_shader_engines; i++) {
+					for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
+						for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+							ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+										pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
+										cu_processor_id, k);
+
+							if (ret < 0)
+								break;
+
+							if (!ret) {
+								num_of_entries++;
+								list_add_tail(&props_ext->list, &dev->cache_props);
+							}
+
+							/* Move to next CU block */
+							num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+								pcu_info->num_cu_per_sh) ?
+								pcache_info[ct].num_cu_shared :
+								(pcu_info->num_cu_per_sh - k);
+							cu_processor_id += num_cu_shared;
  						}
-
-						/* Move to next CU block */
-						num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
-							pcu_info->num_cu_per_sh) ?
-							pcache_info[ct].num_cu_shared :
-							(pcu_info->num_cu_per_sh - k);
-						cu_processor_id += num_cu_shared;
  					}
  				}
  			}
  		} else {
  			ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
-								pcu_info, ct, cu_processor_id);
+					pcu_info, ct, cu_processor_id, kdev);
if (ret < 0)
  				break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index dea32a9e5506..27386ce9a021 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -89,7 +89,7 @@ struct kfd_mem_properties {
  	struct attribute	attr;
  };
-#define CACHE_SIBLINGMAP_SIZE 64
+#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
  	struct list_head	list;



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux