Hi Mukul, See some comments inline ... Am 2020-06-23 um 1:18 p.m. schrieb Mukul Joshi: > Lockdep is spewing circular locking dependency warning when > reading SDMA usage stats. > > 150.887733] ====================================================== > [ 150.893903] WARNING: possible circular locking dependency detected > [ 150.905917] ------------------------------------------------------ > [ 150.912129] kfdtest/4081 is trying to acquire lock: > [ 150.917002] ffff8f7f3762e118 (&mm->mmap_sem#2){++++}, at: > __might_fault+0x3e/0x90 > [ 150.924490] > but task is already holding lock: > [ 150.930320] ffff8f7f49d229e8 (&dqm->lock_hidden){+.+.}, at: > destroy_queue_cpsch+0x29/0x210 [amdgpu] > [ 150.939432] > which lock already depends on the new lock. > [ 150.947603] > the existing dependency chain (in reverse order) is: > [ 150.955074] > -> #3 (&dqm->lock_hidden){+.+.}: > [ 150.960822] __mutex_lock+0xa1/0x9f0 > [ 150.964996] evict_process_queues_cpsch+0x22/0x120 [amdgpu] > [ 150.971155] kfd_process_evict_queues+0x3b/0xc0 [amdgpu] > [ 150.977054] kgd2kfd_quiesce_mm+0x25/0x60 [amdgpu] > [ 150.982442] amdgpu_amdkfd_evict_userptr+0x35/0x70 [amdgpu] > [ 150.988615] amdgpu_mn_invalidate_hsa+0x41/0x60 [amdgpu] > [ 150.994448] __mmu_notifier_invalidate_range_start+0xa4/0x240 > [ 151.000714] copy_page_range+0xd70/0xd80 > [ 151.005159] dup_mm+0x3ca/0x550 > [ 151.008816] copy_process+0x1bdc/0x1c70 > [ 151.013183] _do_fork+0x76/0x6c0 > [ 151.016929] __x64_sys_clone+0x8c/0xb0 > [ 151.021201] do_syscall_64+0x4a/0x1d0 > [ 151.025404] entry_SYSCALL_64_after_hwframe+0x49/0xbe > [ 151.030977] > -> #2 (&adev->notifier_lock){+.+.}: > [ 151.036993] __mutex_lock+0xa1/0x9f0 > [ 151.041168] amdgpu_mn_invalidate_hsa+0x30/0x60 [amdgpu] > [ 151.047019] __mmu_notifier_invalidate_range_start+0xa4/0x240 > [ 151.053277] copy_page_range+0xd70/0xd80 > [ 151.057722] dup_mm+0x3ca/0x550 > [ 151.061388] copy_process+0x1bdc/0x1c70 > [ 151.065748] _do_fork+0x76/0x6c0 > [ 151.069499] __x64_sys_clone+0x8c/0xb0 > [ 151.073765] do_syscall_64+0x4a/0x1d0 > [ 151.077952] entry_SYSCALL_64_after_hwframe+0x49/0xbe > [ 151.083523] > -> #1 (mmu_notifier_invalidate_range_start){+.+.}: > [ 151.090833] change_protection+0x802/0xab0 > [ 151.095448] mprotect_fixup+0x187/0x2d0 > [ 151.099801] setup_arg_pages+0x124/0x250 > [ 151.104251] load_elf_binary+0x3a4/0x1464 > [ 151.108781] search_binary_handler+0x6c/0x210 > [ 151.113656] __do_execve_file.isra.40+0x7f7/0xa50 > [ 151.118875] do_execve+0x21/0x30 > [ 151.122632] call_usermodehelper_exec_async+0x17e/0x190 > [ 151.128393] ret_from_fork+0x24/0x30 > [ 151.132489] > -> #0 (&mm->mmap_sem#2){++++}: > [ 151.138064] __lock_acquire+0x11a1/0x1490 > [ 151.142597] lock_acquire+0x90/0x180 > [ 151.146694] __might_fault+0x68/0x90 > [ 151.150879] read_sdma_queue_counter+0x5f/0xb0 [amdgpu] > [ 151.156693] update_sdma_queue_past_activity_stats+0x3b/0x90 [amdgpu] > [ 151.163725] destroy_queue_cpsch+0x1ae/0x210 [amdgpu] > [ 151.169373] pqm_destroy_queue+0xf0/0x250 [amdgpu] > [ 151.174762] kfd_ioctl_destroy_queue+0x32/0x70 [amdgpu] > [ 151.180577] kfd_ioctl+0x223/0x400 [amdgpu] > [ 151.185284] ksys_ioctl+0x8f/0xb0 > [ 151.189118] __x64_sys_ioctl+0x16/0x20 > [ 151.193389] do_syscall_64+0x4a/0x1d0 > [ 151.197569] entry_SYSCALL_64_after_hwframe+0x49/0xbe > [ 151.203141] > other info that might help us debug this: > > [ 151.211140] Chain exists of: > &mm->mmap_sem#2 --> &adev->notifier_lock --> &dqm->lock_hidden > > 151.222535] Possible unsafe locking scenario: > > [ 151.228447] CPU0 CPU1 > [ 151.232971] ---- ---- > [ 151.237502] lock(&dqm->lock_hidden); > [ 151.241254] lock(&adev->notifier_lock); > [ 151.247774] lock(&dqm->lock_hidden); > [ 151.254038] lock(&mm->mmap_sem#2); > [ 151.257610] > > This commit fixes the warning by ensuring get_user() is not called > while reading SDMA stats with dqm_lock held as get_user() could cause a > page fault which leads to the circular locking scenario. > > Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> > --- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 36 +++--- > .../drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 120 +++++++++++++++--- > 3 files changed, 124 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 21eb0998c4ae..204612de3dbc 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -153,24 +153,23 @@ static void decrement_queue_count(struct device_queue_manager *dqm, > dqm->active_cp_queue_count--; > } > > -int read_sdma_queue_counter(struct queue *q, uint64_t *val) > +int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val) > { > int ret; > uint64_t tmp = 0; > > - if (!q || !val) > + if (!val) > return -EINVAL; > /* > * SDMA activity counter is stored at queue's RPTR + 0x8 location. > */ > - if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr + > + if (!access_ok((const void __user *)(q_rptr + > sizeof(uint64_t)), sizeof(uint64_t))) { > pr_err("Can't access sdma queue activity counter\n"); > return -EFAULT; > } > > - ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) + > - sizeof(uint64_t))); > + ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t))); > if (!ret) { > *val = tmp; > } > @@ -187,14 +186,15 @@ static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd, > if (!pdd) > return -ENODEV; > > - ret = read_sdma_queue_counter(q, &val); > + ret = read_sdma_queue_counter((uint64_t)q->properties.read_ptr, &val); > if (ret) { > pr_err("Failed to read SDMA queue counter for queue: %d\n", > q->properties.queue_id); > return ret; > } > > - pdd->sdma_past_activity_counter += val; > + WRITE_ONCE(pdd->sdma_past_activity_counter, > + pdd->sdma_past_activity_counter + val); I guess you're trying to make this atomic, since you moved it outside the DQM lock. But this is not an atomic add. To do that you'll need to make sdma_past_activity_counter an atomic64_t and use atomic64_add for the update here, and atomic64_read for read access. But see my comment below about another possible race condition. > > return ret; > } > @@ -533,11 +533,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, > if (retval == -ETIME) > qpd->reset_wavefronts = true; > > - /* Get the SDMA queue stats */ > - if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || > - (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { > - update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); > - } > > mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); > > @@ -578,6 +573,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, > retval = destroy_queue_nocpsch_locked(dqm, qpd, q); > dqm_unlock(dqm); > > + /* Get the SDMA queue stats */ > + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || > + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { > + update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); > + } > + > return retval; > } > > @@ -1520,11 +1521,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, > } > } > > - /* Get the SDMA queue stats */ > - if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || > - (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { > - update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); > - } > /* > * Unconditionally decrement this counter, regardless of the queue's > * type > @@ -1535,6 +1531,12 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, > > dqm_unlock(dqm); > > + /* Get the SDMA queue stats */ > + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || > + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { > + update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); > + } > + > /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ > mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index 894bcf877f9e..49d8e324c636 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -251,6 +251,5 @@ static inline void dqm_unlock(struct device_queue_manager *dqm) > mutex_unlock(&dqm->lock_hidden); > } > > -int read_sdma_queue_counter(struct queue *q, uint64_t *val); > - > +int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val); > #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 8616a204e4c3..07286b535b20 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -86,6 +86,13 @@ struct kfd_sdma_activity_handler_workarea { > uint64_t sdma_activity_counter; > }; > > +struct temp_sdma_queue_list { > + uint64_t rptr; > + uint64_t sdma_val; > + unsigned int queue_id; > + struct list_head list; > +}; > + > static void kfd_sdma_activity_worker(struct work_struct *work) > { > struct kfd_sdma_activity_handler_workarea *workarea; > @@ -96,6 +103,8 @@ static void kfd_sdma_activity_worker(struct work_struct *work) > struct qcm_process_device *qpd; > struct device_queue_manager *dqm; > int ret = 0; > + struct temp_sdma_queue_list sdma_q_list; > + struct temp_sdma_queue_list *sdma_q, *next; > > workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, > sdma_activity_work); > @@ -109,7 +118,59 @@ static void kfd_sdma_activity_worker(struct work_struct *work) > qpd = &pdd->qpd; > if (!dqm || !qpd) > return; > + /* > + * Total SDMA activity is current SDMA activity + past SDMA activity > + * Past SDMA count is stored in pdd. > + * To get the current activity counters for all active SDMA queues, > + * we loop over all SDMA queues and get their counts from user-space. > + * > + * We cannot call get_user() with dqm_lock held as it can cause > + * a circular lock dependency situation. To read the SDMA stats, > + * we need to do the following: > + * > + * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list, > + * with dqm_lock/dqm_unlock(). > + * 2. Call get_user() for each node in temporary list without dqm_lock. > + * Save the SDMA count for each node and also add the count to the total > + * SDMA count counter. > + * Its possible, during this step, a few SDMA queue nodes got deleted > + * from the qpd->queues_list. > + * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted. > + * If any node got deleted, its SDMA count would be captured in the sdma > + * past activity counter. There is still a small race condition here because the past_activity_count gets updated outside the DQM lock. So you can run into a situation where the queue has been removed, but the past_activity_count doesn't reflect that yet. You could fix that by calculating the update of the past_activity_count before you take the dqm_lock in the queue_destroy function, but applying the update inside the dqm_lock. > So subtract the SDMA counter stored in step 2 > + * for this node from the total SDMA count. > + */ > + INIT_LIST_HEAD(&sdma_q_list.list); > + > + /* > + * Create the temp list of all SDMA queues > + */ > + dqm_lock(dqm); > > + list_for_each_entry(q, &qpd->queues_list, list) { > + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || > + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { To reduce indentation below and improve readability, you could + if (q->properties.type != KFD_QUEUE_TYPE_SDMA && + q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) + continue; > + sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL); > + if (!sdma_q) > + return; Here you're leaking memory and the dqm_lock. > + > + INIT_LIST_HEAD(&sdma_q->list); > + sdma_q->rptr = (uint64_t)q->properties.read_ptr; > + sdma_q->queue_id = q->properties.queue_id; > + list_add_tail(&sdma_q->list, &sdma_q_list.list); > + } > + } > + > + dqm_unlock(dqm); > + > + if (list_empty(&sdma_q_list.list)) { > + workarea->sdma_activity_counter = READ_ONCE(pdd->sdma_past_activity_counter); > + return; Leaking memory. > + } > + > + /* > + * Get the usage count for each SDMA queue in temp_list. > + */ > mm = get_task_mm(pdd->process->lead_thread); > if (!mm) { > return; > @@ -117,33 +178,60 @@ static void kfd_sdma_activity_worker(struct work_struct *work) > > use_mm(mm); > > - dqm_lock(dqm); > + list_for_each_entry(sdma_q, &sdma_q_list.list, list) { > + val = 0; > + ret = read_sdma_queue_counter(sdma_q->rptr, &val); > + if (ret) > + pr_debug("Failed to read SDMA queue active " > + "counter for queue id: %d", > + sdma_q->queue_id); > + else { > + sdma_q->sdma_val = val; > + workarea->sdma_activity_counter += val; > + } > + } > > - /* > - * Total SDMA activity is current SDMA activity + past SDMA activity > - */ > - workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; > + unuse_mm(mm); > + mmput(mm); > > /* > - * Get the current activity counters for all active SDMA queues > + * Do a second iteration over qpd_queues_list to check if any SDMA > + * nodes got deleted while fetching SDMA counter. > */ > + dqm_lock(dqm); > + > + workarea->sdma_activity_counter += READ_ONCE(pdd->sdma_past_activity_counter); > + > list_for_each_entry(q, &qpd->queues_list, list) { > + if (list_empty(&sdma_q_list.list)) { > + break; > + } > + > if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || > (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { Same as above about indentation. Regards, Felix > - val = 0; > - ret = read_sdma_queue_counter(q, &val); > - if (ret) > - pr_debug("Failed to read SDMA queue active " > - "counter for queue id: %d", > - q->properties.queue_id); > - else > - workarea->sdma_activity_counter += val; > + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { > + if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) && > + (sdma_q->queue_id == q->properties.queue_id)) { > + list_del(&sdma_q->list); > + kfree(sdma_q); > + break; > + } > + } > } > } > > dqm_unlock(dqm); > - unuse_mm(mm); > - mmput(mm); > + > + /* > + * If temp list is not empty, it implies some queues got deleted > + * from qpd->queues_list during SDMA usage read. Subtract the SDMA > + * count for each node from the total SDMA count. > + */ > + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { > + workarea->sdma_activity_counter -= sdma_q->sdma_val; > + list_del(&sdma_q->list); > + kfree(sdma_q); > + } > } > > static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx