Re: [PATCH] drm/amdkfd: Release the topology_lock in error case

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 2022-11-16 um 03:04 schrieb Ma Jun:
Release the topology_lock in error case

Signed-off-by: Ma Jun <Jun.Ma2@xxxxxxx>
Reported-by: Dan Carpenter <error27@xxxxxxxxx>
Dan, did you change your email address, is this one correct?

Ma Jun, thanks for looking into this. Some of this problem predates your patch that was flagged by Dan. I would prefer a more consistent and robust handling of these error cases. I think everything inside the topology lock could be moved into another function to simplify the error handling. I'm attaching a completely untested patch to illustrate the idea.

Regards,
  Felix


---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ef9c6fdfb88d..5ea737337658 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1841,6 +1841,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
  			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
  			       gpu_id);
  			topology_crat_proximity_domain--;
+			up_write(&topology_lock);
  			return res;
  		}
@@ -1851,6 +1852,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
  			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
  			       gpu_id);
  			topology_crat_proximity_domain--;
+			up_write(&topology_lock);
  			goto err;
  		}
@@ -1860,6 +1862,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
  		dev = kfd_assign_gpu(gpu);
  		if (WARN_ON(!dev)) {
  			res = -ENODEV;
+			up_write(&topology_lock);
  			goto err;
  		}
  
From ceb79972cdd490de181a6895836e40bf4e93c631 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <felix.kuehling@xxxxxxxxx>
Date: Wed, 16 Nov 2022 15:38:44 -0500
Subject: [PATCH] drm/amdkfd: Release the topology_lock in error case

Move the topology-locked part of kfd_topology_add_device into a separate
function to simlpify error handling and release the topology lock
consistently.

Reported-by: Dan Carpenter <error27@xxxxxxxxx>
Signed-off-by: Felix Kuehling <felix.kuehling@xxxxxxxxx>
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 109 ++++++++++++----------
 1 file changed, 58 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ef9c6fdfb88d..b56beab71c5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1805,16 +1805,66 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
 	pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
 }
 
+static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id,
+					  struct kfd_topology_device **dev)
+{
+	int proximity_domain = ++topology_crat_proximity_domain;
+	struct list_head temp_topology_device_list;
+	void *crat_image = NULL;
+	size_t image_size = 0;
+	int res;
+
+	res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+					    COMPUTE_UNIT_GPU, gpu,
+					    proximity_domain);
+	if (res) {
+		pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+		       gpu_id);
+		topology_crat_proximity_domain--;
+		return res;
+	}
+
+	res = kfd_parse_crat_table(crat_image,
+				   &temp_topology_device_list,
+				   proximity_domain);
+	if (res) {
+		pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+		       gpu_id);
+		topology_crat_proximity_domain--;
+		return res;
+	}
+
+	kfd_topology_update_device_list(&temp_topology_device_list,
+					&topology_device_list);
+
+	*dev = kfd_assign_gpu(gpu);
+	if (WARN_ON(!*dev))
+		return -ENODEV;
+
+	/* Fill the cache affinity information here for the GPUs
+	 * using VCRAT
+	 */
+	kfd_fill_cache_non_crat_info(*dev, gpu);
+
+	/* Update the SYSFS tree, since we added another topology
+	 * device
+	 */
+	res = kfd_topology_update_sysfs();
+	if (!res)
+		sys_props.generation_count++;
+	else
+		pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+		       gpu_id, res);
+
+	return res;
+}
+
 int kfd_topology_add_device(struct kfd_dev *gpu)
 {
 	uint32_t gpu_id;
 	struct kfd_topology_device *dev;
 	struct kfd_cu_info cu_info;
 	int res = 0;
-	struct list_head temp_topology_device_list;
-	void *crat_image = NULL;
-	size_t image_size = 0;
-	int proximity_domain;
 	int i;
 	const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
 
@@ -1831,54 +1881,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	 */
 	down_write(&topology_lock);
 	dev = kfd_assign_gpu(gpu);
-	if (!dev) {
-		proximity_domain = ++topology_crat_proximity_domain;
-
-		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
-						    COMPUTE_UNIT_GPU, gpu,
-						    proximity_domain);
-		if (res) {
-			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
-			       gpu_id);
-			topology_crat_proximity_domain--;
-			return res;
-		}
-
-		res = kfd_parse_crat_table(crat_image,
-					   &temp_topology_device_list,
-					   proximity_domain);
-		if (res) {
-			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
-			       gpu_id);
-			topology_crat_proximity_domain--;
-			goto err;
-		}
-
-		kfd_topology_update_device_list(&temp_topology_device_list,
-			&topology_device_list);
-
-		dev = kfd_assign_gpu(gpu);
-		if (WARN_ON(!dev)) {
-			res = -ENODEV;
-			goto err;
-		}
-
-		/* Fill the cache affinity information here for the GPUs
-		 * using VCRAT
-		 */
-		kfd_fill_cache_non_crat_info(dev, gpu);
-
-		/* Update the SYSFS tree, since we added another topology
-		 * device
-		 */
-		res = kfd_topology_update_sysfs();
-		if (!res)
-			sys_props.generation_count++;
-		else
-			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
-						gpu_id, res);
-	}
+	if (!dev)
+		res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
 	up_write(&topology_lock);
+	if (res)
+		goto err;
 
 	dev->gpu_id = gpu_id;
 	gpu->id = gpu_id;
-- 
2.34.1


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux