[AMD Official Use Only] sudo ./kfdtest --gtest_filter=KFDSVM* sudo ./kfdtest Test results are pass. Tested-by: Ruili Ji <ruili.ji@xxxxxxx> -----Original Message----- From: Yang, Philip <Philip.Yang@xxxxxxx> Sent: 2022年1月20日 0:23 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Ji, Ruili <Ruili.Ji@xxxxxxx>; Yang, Philip <Philip.Yang@xxxxxxx> Subject: [PATCH 1/2] drm/amdkfd: svm deferred_list work continue cleanup after mm gone After mm is removed from task->mm, deferred_list work should continue to handle deferred_range_list which maybe split to child range to avoid child range leak, and remove ranges mmu interval notifier to avoid mm mm_count leak, but skip updating notifier and inserting new notifier. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Reported-by: Ruili Ji <ruili.ji@xxxxxxx> Tested-by: Ruili Ji <ruili.ji@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2805ba74c80..9ec195e1ef23 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1985,10 +1985,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, } static void -svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange, + struct mm_struct *mm) { - struct mm_struct *mm = prange->work_item.mm; - switch (prange->work_item.op) { case SVM_OP_NULL: pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", @@ -2004,25 +2003,29 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) case SVM_OP_UPDATE_RANGE_NOTIFIER: pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); - svm_range_update_notifier_and_interval_tree(mm, prange); + if (mm) + svm_range_update_notifier_and_interval_tree(mm, prange); break; case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP: pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); - svm_range_update_notifier_and_interval_tree(mm, prange); + if (mm) + svm_range_update_notifier_and_interval_tree(mm, prange); /* TODO: implement deferred validation and mapping */ break; case SVM_OP_ADD_RANGE: pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); svm_range_add_to_svms(prange); - svm_range_add_notifier_locked(mm, prange); + if (mm) + svm_range_add_notifier_locked(mm, prange); break; case SVM_OP_ADD_RANGE_AND_MAP: pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); svm_range_add_to_svms(prange); - svm_range_add_notifier_locked(mm, prange); + if (mm) + svm_range_add_notifier_locked(mm, prange); /* TODO: implement deferred validation and mapping */ break; default: @@ -2071,20 +2074,22 @@ static void svm_range_deferred_list_work(struct work_struct *work) pr_debug("enter svms 0x%p\n", svms); p = container_of(svms, struct kfd_process, svms); - /* Avoid mm is gone when inserting mmu notifier */ + + /* If mm is gone, continue cleanup the deferred_range_list */ mm = get_task_mm(p->lead_thread); - if (!mm) { + if (!mm) pr_debug("svms 0x%p process mm gone\n", svms); - return; - } + retry: - mmap_write_lock(mm); + if (mm) + mmap_write_lock(mm); /* Checking for the need to drain retry faults must be inside * mmap write lock to serialize with munmap notifiers. */ if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); + if (mm) + mmap_write_unlock(mm); svm_range_drain_retry_fault(svms); goto retry; } @@ -2109,19 +2114,21 @@ static void svm_range_deferred_list_work(struct work_struct *work) pr_debug("child prange 0x%p op %d\n", pchild, pchild->work_item.op); list_del_init(&pchild->child_list); - svm_range_handle_list_op(svms, pchild); + svm_range_handle_list_op(svms, pchild, mm); } mutex_unlock(&prange->migrate_mutex); - svm_range_handle_list_op(svms, prange); + svm_range_handle_list_op(svms, prange, mm); mutex_unlock(&svms->lock); spin_lock(&svms->deferred_list_lock); } spin_unlock(&svms->deferred_list_lock); - mmap_write_unlock(mm); - mmput(mm); + if (mm) { + mmap_write_unlock(mm); + mmput(mm); + } pr_debug("exit svms 0x%p\n", svms); } -- 2.17.1