To be able to handle multiple GPU page faults concurrently 1. remove mmap write lock, take and release mmap read lock when checking vma, migrating and calling hmm_range_fault. 2. insert new range mmu interval notifier without taking mmap lock. 3. take and release svms lock when checking and inserting new range to svms. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 70 ++++++++++++++-------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3b78e48832e9..cc24f30f88fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -117,6 +117,18 @@ svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) &svm_range_mn_ops); } +static void +svm_range_add_notifier(struct mm_struct *mm, struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + mmu_interval_notifier_insert(&prange->notifier, mm, + prange->start << PAGE_SHIFT, + prange->npages << PAGE_SHIFT, + &svm_range_mn_ops); +} + /** * svm_range_add_to_svms - add svm range to svms * @prange: svm range structure to be added @@ -2801,9 +2813,6 @@ svm_range *svm_range_create_unregistered_range(struct kfd_node *node, if (is_heap_stack) prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM; - svm_range_add_to_svms(prange); - svm_range_add_notifier_locked(mm, prange); - return prange; } @@ -2899,9 +2908,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct kfd_node *node; int32_t best_loc; int32_t gpuidx = MAX_GPU_INSTANCE; - bool write_locked = false; struct vm_area_struct *vma; bool migration = false; + bool new_prange = false; int r = 0; if (!KFD_IS_SVM_API_SUPPORTED(adev)) { @@ -2947,42 +2956,30 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, r = -EFAULT; goto out; } + mmap_read_lock(mm); -retry_write_locked: mutex_lock(&svms->lock); prange = svm_range_from_addr(svms, addr, NULL); if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", svms, addr); - if (!write_locked) { - /* Need the write lock to create new range with MMU notifier. - * Also flush pending deferred work to make sure the interval - * tree is up to date before we add a new range - */ - mutex_unlock(&svms->lock); - mmap_read_unlock(mm); - mmap_write_lock(mm); - write_locked = true; - goto retry_write_locked; - } prange = svm_range_create_unregistered_range(node, p, mm, addr); if (!prange) { pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n", svms, addr); - mmap_write_downgrade(mm); r = -EFAULT; - goto out_unlock_svms; + mutex_unlock(&svms->lock); + goto out_unlock_mm; } + svm_range_add_to_svms(prange); + new_prange = true; } - if (write_locked) - mmap_write_downgrade(mm); - - mutex_lock(&prange->migrate_mutex); + mutex_unlock(&svms->lock); if (svm_range_skip_recover(prange)) { amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid); r = 0; - goto out_unlock_range; + goto out_unlock_mm; } /* skip duplicate vm fault on different pages of same range */ @@ -2991,7 +2988,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("svms 0x%p [0x%lx %lx] already restored\n", svms, prange->start, prange->last); r = 0; - goto out_unlock_range; + goto out_unlock_mm; } /* __do_munmap removed VMA, return success as we are handling stale @@ -3001,14 +2998,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (!vma) { pr_debug("address 0x%llx VMA is removed\n", addr); r = 0; - goto out_unlock_range; + goto out_unlock_mm; } if (!svm_fault_allowed(vma, write_fault)) { pr_debug("fault addr 0x%llx no %s permission\n", addr, write_fault ? "write" : "read"); r = -EPERM; - goto out_unlock_range; + goto out_unlock_mm; } best_loc = svm_range_best_restore_location(prange, node, &gpuidx); @@ -3016,9 +3013,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", svms, prange->start, prange->last); r = -EACCES; - goto out_unlock_range; + goto out_unlock_mm; } + mutex_lock(&prange->migrate_mutex); + pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n", svms, prange->start, prange->last, best_loc, prange->actual_loc); @@ -3055,9 +3054,17 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", r, svms, start, last); - goto out_unlock_range; + mutex_unlock(&prange->migrate_mutex); + goto out_unlock_mm; } } + mutex_unlock(&prange->migrate_mutex); + mmap_read_unlock(mm); + + if (new_prange) + svm_range_add_notifier(mm, prange); + + mmap_read_lock(mm); r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, false, false); @@ -3068,14 +3075,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); -out_unlock_range: - mutex_unlock(&prange->migrate_mutex); -out_unlock_svms: - mutex_unlock(&svms->lock); +out_unlock_mm: mmap_read_unlock(mm); - svm_range_count_fault(node, p, gpuidx); - mmput(mm); out: kfd_unref_process(p); -- 2.35.1