There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..fe120cc0930c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = 0; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ + mqd_mgr = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + + if (!mqd_mgr) { + retval = -ENOMEM; + goto out; + } + + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (qpd->evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval) + goto out; + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { @@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_deallocate_doorbell; - } - /* - * Eviction state logic: we only mark active queues as evicted - * to avoid the overhead of restoring inactive queues later - */ - if (qpd->evicted) - q->properties.is_evicted = (q->properties.queue_size > 0 && - q->properties.queue_percent > 0 && - q->properties.queue_address != 0); - - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - - q->properties.tba_addr = qpd->tba_addr; - q->properties.tma_addr = qpd->tma_addr; - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, dqm_unlock(dqm); return retval; -out_deallocate_doorbell: - deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); out_unlock: dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx