This allows validation of child ranges, so the GPU page fault handler can be more light-weight. Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 8 +++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 40 +++++++++++++++++------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 4d79d69d8aac..cc8bf6438383 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -826,12 +826,18 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) } mutex_lock(&parent->migrate_mutex); + if (prange != parent) + mutex_lock_nested(&prange->migrate_mutex, 1); if (!prange->actual_loc) goto out_unlock_prange; svm_range_lock(parent); + if (prange != parent) + mutex_lock_nested(&prange->lock, 1); r = svm_range_split_by_granularity(p, mm, addr, parent, prange); + if (prange != parent) + mutex_unlock(&prange->lock); svm_range_unlock(parent); if (r) { pr_debug("failed %d to split range by granularity\n", r); @@ -852,6 +858,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) schedule_deferred_list_work(&p->svms); out_unlock_prange: + if (prange != parent) + mutex_unlock(&prange->migrate_mutex); mutex_unlock(&parent->migrate_mutex); out: mutex_unlock(&p->svms.lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0a6e28f0dcaf..bc1a9e9ba722 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1281,10 +1281,6 @@ void svm_range_unreserve_bos(struct svm_validate_context *ctx) * serialize concurrent migrations or validations of the same range, the * prange->migrate_mutex must be held. * - * The range must be in the inverval tree and have an MMU notifier to catch - * concurrent invalidations of the virtual address range. This means it cannot - * be a child range. - * * For VRAM ranges, the SVM BO must be allocated and valid (protected by its * eviction fence. * @@ -1568,10 +1564,24 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, schedule_delayed_work(&svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { + struct svm_range *pchild; + unsigned long s, l; + pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); svm_range_lock(prange); - svm_range_unmap_from_gpus(prange, start, last); + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); + mutex_unlock(&pchild->lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); svm_range_unlock(prange); } @@ -1927,6 +1937,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct svm_range_list *svms; struct svm_range *pchild; struct kfd_process *p; + unsigned long s, l; bool unmap_parent; p = kfd_lookup_process_by_mm(mm); @@ -1937,14 +1948,23 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - svm_range_unmap_from_gpus(prange, start, last); - svm_range_lock(prange); unmap_parent = start <= prange->start && last >= prange->last; - list_for_each_entry(pchild, &prange->child_list, child_list) + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); svm_range_unmap_split(mm, prange, pchild, start, last); + mutex_unlock(&pchild->lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); svm_range_unmap_split(mm, prange, prange, start, last); svm_range_unlock(prange); @@ -2142,12 +2162,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - svm_range_list_lock_and_flush_work(svms, mm); + mmap_read_lock(mm); mutex_lock(&svms->lock); prange = svm_range_from_addr(svms, addr, NULL); - mmap_write_downgrade(mm); - if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", svms, addr); -- 2.31.0 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel