Am 2021-11-18 um 11:19 a.m. schrieb philip yang: > > > On 2021-11-17 7:14 p.m., Felix Kuehling wrote: >> >> On 2021-11-16 10:43 p.m., Philip Yang wrote: >>> unmap range always set svms->drain_pagefaults flag to simplify both >>> parent range and child range unmap. Deferred list work takes mmap write >>> lock to read and clear svms->drain_pagefaults, to serialize with unmap >>> callback. >>> >>> Add atomic flag svms->draining_faults, if svms->draining_faults is set, >>> page fault handle ignore the retry fault, to speed up interrupt >>> handling. >> Having both svms->drain_pagefaults and svms->draining_faults is >> confusing. Do we really need both? > > Using one flag, I can not find a way to handle the case, unmap new > range. if the flag is set, restore_pages uses the flag to drop fault, > then drain_retry_fault reset the flag after draining is done, we will > not able to drain retry fault for the new range. > I think the correct solution would be to use atomic_inc to set the flag and atomic_cmp_xchg in svm_range_drain_retry_fault to clear it. If the flag was changed while svm_range_drain_retry_fault executed, it means another drain was requested by someone else and the flag should remain set for another round of draining. Regards, Felix > Regards, > > Philip > >> Regards, >> Felix >> >>> >>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + >>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++++++++++++------ >>> 2 files changed, 19 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> index 1d3f012bcd2a..4e4640b731e1 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> @@ -767,6 +767,7 @@ struct svm_range_list { >>> spinlock_t deferred_list_lock; >>> atomic_t evicted_ranges; >>> bool drain_pagefaults; >>> + atomic_t draining_faults; >>> struct delayed_work restore_work; >>> DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); >>> struct task_struct *faulting_task; >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> index 3eb0a9491755..d332462bf9d3 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> @@ -1962,6 +1962,7 @@ void svm_range_drain_retry_fault(struct >>> svm_range_list *svms) >>> p = container_of(svms, struct kfd_process, svms); >>> + atomic_set(&svms->draining_faults, 1); >>> for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { >>> pdd = p->pdds[i]; >>> if (!pdd) >>> @@ -1975,6 +1976,7 @@ void svm_range_drain_retry_fault(struct >>> svm_range_list *svms) >>> flush_work(&adev->irq.ih1_work); >>> pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, >>> svms); >>> } >>> + atomic_set(&svms->draining_faults, 0); >>> } >>> static void svm_range_deferred_list_work(struct work_struct *work) >>> @@ -2002,6 +2004,7 @@ static void >>> svm_range_deferred_list_work(struct work_struct *work) >>> * mmap write lock to serialize with munmap notifiers. >>> */ >>> if (unlikely(READ_ONCE(svms->drain_pagefaults))) { >>> + atomic_set(&svms->draining_faults, 1); >>> WRITE_ONCE(svms->drain_pagefaults, false); >>> mmap_write_unlock(mm); >>> svm_range_drain_retry_fault(svms); >>> @@ -2049,12 +2052,6 @@ svm_range_add_list_work(struct svm_range_list >>> *svms, struct svm_range *prange, >>> struct mm_struct *mm, enum svm_work_list_ops op) >>> { >>> spin_lock(&svms->deferred_list_lock); >>> - /* Make sure pending page faults are drained in the deferred >>> worker >>> - * before the range is freed to avoid straggler interrupts on >>> - * unmapped memory causing "phantom faults". >>> - */ >>> - if (op == SVM_OP_UNMAP_RANGE) >>> - svms->drain_pagefaults = true; >>> /* if prange is on the deferred list */ >>> if (!list_empty(&prange->deferred_list)) { >>> pr_debug("update exist prange 0x%p work op %d\n", prange, >>> op); >>> @@ -2133,6 +2130,13 @@ svm_range_unmap_from_cpu(struct mm_struct >>> *mm, struct svm_range *prange, >>> pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx >>> 0x%lx]\n", svms, >>> prange, prange->start, prange->last, start, last); >>> + /* Make sure pending page faults are drained in the deferred >>> worker >>> + * before the range is freed to avoid straggler interrupts on >>> + * unmapped memory causing "phantom faults". >>> + */ >>> + pr_debug("set range drain_pagefaults true\n"); >>> + WRITE_ONCE(svms->drain_pagefaults, true); >>> + >>> unmap_parent = start <= prange->start && last >= prange->last; >>> list_for_each_entry(pchild, &prange->child_list, child_list) { >>> @@ -2595,6 +2599,13 @@ svm_range_restore_pages(struct amdgpu_device >>> *adev, unsigned int pasid, >>> mm = p->mm; >>> mmap_write_lock(mm); >>> + if (!!atomic_read(&svms->draining_faults) || >>> + READ_ONCE(svms->drain_pagefaults)) { >>> + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); >>> + mmap_write_downgrade(mm); >>> + goto out_unlock_mm; >>> + } >>> + >>> vma = find_vma(p->mm, addr << PAGE_SHIFT); >>> if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { >>> pr_debug("VMA not found for address 0x%llx\n", addr); >>> @@ -2732,6 +2743,7 @@ int svm_range_list_init(struct kfd_process *p) >>> mutex_init(&svms->lock); >>> INIT_LIST_HEAD(&svms->list); >>> atomic_set(&svms->evicted_ranges, 0); >>> + atomic_set(&svms->draining_faults, 0); >>> INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); >>> INIT_WORK(&svms->deferred_list_work, >>> svm_range_deferred_list_work); >>> INIT_LIST_HEAD(&svms->deferred_range_list);