Can we instead take a proper reference to the mm in svm_range_add_list_work? That way the mm would remain valid as long as the work is scheduled. So instead of calling get_task_mm in svm_range_deferred_list_work, do it in svm_range_add_list_work. Regards, Felix Am 2022-01-19 um 11:22 a.m. schrieb Philip Yang: > After mm is removed from task->mm, deferred_list work should continue to > handle deferred_range_list which maybe split to child range to avoid > child range leak, and remove ranges mmu interval notifier to avoid mm > mm_count leak, but skip updating notifier and inserting new notifier. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> > Reported-by: Ruili Ji <ruili.ji@xxxxxxx> > Tested-by: Ruili Ji <ruili.ji@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++------------ > 1 file changed, 24 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index f2805ba74c80..9ec195e1ef23 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -1985,10 +1985,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, > } > > static void > -svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) > +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange, > + struct mm_struct *mm) > { > - struct mm_struct *mm = prange->work_item.mm; > - > switch (prange->work_item.op) { > case SVM_OP_NULL: > pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", > @@ -2004,25 +2003,29 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) > case SVM_OP_UPDATE_RANGE_NOTIFIER: > pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", > svms, prange, prange->start, prange->last); > - svm_range_update_notifier_and_interval_tree(mm, prange); > + if (mm) > + svm_range_update_notifier_and_interval_tree(mm, prange); > break; > case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP: > pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", > svms, prange, prange->start, prange->last); > - svm_range_update_notifier_and_interval_tree(mm, prange); > + if (mm) > + svm_range_update_notifier_and_interval_tree(mm, prange); > /* TODO: implement deferred validation and mapping */ > break; > case SVM_OP_ADD_RANGE: > pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, > prange->start, prange->last); > svm_range_add_to_svms(prange); > - svm_range_add_notifier_locked(mm, prange); > + if (mm) > + svm_range_add_notifier_locked(mm, prange); > break; > case SVM_OP_ADD_RANGE_AND_MAP: > pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, > prange, prange->start, prange->last); > svm_range_add_to_svms(prange); > - svm_range_add_notifier_locked(mm, prange); > + if (mm) > + svm_range_add_notifier_locked(mm, prange); > /* TODO: implement deferred validation and mapping */ > break; > default: > @@ -2071,20 +2074,22 @@ static void svm_range_deferred_list_work(struct work_struct *work) > pr_debug("enter svms 0x%p\n", svms); > > p = container_of(svms, struct kfd_process, svms); > - /* Avoid mm is gone when inserting mmu notifier */ > + > + /* If mm is gone, continue cleanup the deferred_range_list */ > mm = get_task_mm(p->lead_thread); > - if (!mm) { > + if (!mm) > pr_debug("svms 0x%p process mm gone\n", svms); > - return; > - } > + > retry: > - mmap_write_lock(mm); > + if (mm) > + mmap_write_lock(mm); > > /* Checking for the need to drain retry faults must be inside > * mmap write lock to serialize with munmap notifiers. > */ > if (unlikely(atomic_read(&svms->drain_pagefaults))) { > - mmap_write_unlock(mm); > + if (mm) > + mmap_write_unlock(mm); > svm_range_drain_retry_fault(svms); > goto retry; > } > @@ -2109,19 +2114,21 @@ static void svm_range_deferred_list_work(struct work_struct *work) > pr_debug("child prange 0x%p op %d\n", pchild, > pchild->work_item.op); > list_del_init(&pchild->child_list); > - svm_range_handle_list_op(svms, pchild); > + svm_range_handle_list_op(svms, pchild, mm); > } > mutex_unlock(&prange->migrate_mutex); > > - svm_range_handle_list_op(svms, prange); > + svm_range_handle_list_op(svms, prange, mm); > mutex_unlock(&svms->lock); > > spin_lock(&svms->deferred_list_lock); > } > spin_unlock(&svms->deferred_list_lock); > > - mmap_write_unlock(mm); > - mmput(mm); > + if (mm) { > + mmap_write_unlock(mm); > + mmput(mm); > + } > pr_debug("exit svms 0x%p\n", svms); > } >