Re: [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 04.02.19 um 19:23 schrieb Yang, Philip:
There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock

To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)

In general this sounds correct to me, but apart from that I don't know the code well enough to fully judge.


Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153
Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>

Acked-by: Christian König <christian.koenig@xxxxxxx>

---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++---------
  1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..efe0d3c0215b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  	int retval;
  	struct mqd_manager *mqd_mgr;
- retval = 0;
-
-	dqm_lock(dqm);
-
  	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  		pr_warn("Can't create new usermode queue because %d queues were already created\n",
  				dqm->total_queue_count);
  		retval = -EPERM;
-		goto out_unlock;
+		goto out;
  	}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
  		retval = allocate_sdma_queue(dqm, &q->sdma_id);
  		if (retval)
-			goto out_unlock;
+			goto out;
  		q->properties.sdma_queue_id =
  			q->sdma_id / get_num_sdma_engines(dqm);
  		q->properties.sdma_engine_id =
@@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  	if (retval)
  		goto out_deallocate_sdma_queue;
+ /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+	 * lock(dqm) -> bo::reserve
+	 */
  	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
  			get_mqd_type_from_queue_type(q->properties.type));
@@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  		retval = -ENOMEM;
  		goto out_deallocate_doorbell;
  	}
+
  	/*
  	 * Eviction state logic: we only mark active queues as evicted
  	 * to avoid the overhead of restoring inactive queues later
@@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  		q->properties.is_evicted = (q->properties.queue_size > 0 &&
  					    q->properties.queue_percent > 0 &&
  					    q->properties.queue_address != 0);
-
  	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
  	q->properties.tba_addr = qpd->tba_addr;
  	q->properties.tma_addr = qpd->tma_addr;
  	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  	if (retval)
  		goto out_deallocate_doorbell;
+ dqm_lock(dqm);
+
  	list_add(&q->list, &qpd->queues_list);
  	qpd->queue_count++;
  	if (q->properties.is_active) {
@@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  out_deallocate_sdma_queue:
  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  		deallocate_sdma_queue(dqm, q->sdma_id);
-out_unlock:
-	dqm_unlock(dqm);
-
+out:
  	return retval;
  }
@@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
  			qpd->reset_wavefronts = true;
  	}
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
  	/*
  	 * Unconditionally decrement this counter, regardless of the queue's
  	 * type
@@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
  	return retval;
failed:
@@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
  		qpd->reset_wavefronts = false;
  	}
- /* lastly, free mqd resources */
+	dqm_unlock(dqm);
+
+	/* Lastly, free mqd resources.
+	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+	 */
  	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
  		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
  			get_mqd_type_from_queue_type(q->properties.type));
@@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
  	}
out:
-	dqm_unlock(dqm);
  	return retval;
  }

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux