When MMU notifier invalidate the range, align the start and last address to range granularity to unmap from GPU and update bitmap_map flag. Skip unmap from GPU if range is already unmapped based on bitmap_map flag. This avoids unmap 1 page from GPU and flush TLB, also solve the rocgdb CWSR migration related issue. Unmap the range from cpu will remove the range and split the range, this cannot align the start and last address to range granularity. Change to split the range and bitmap_map flag first, then unmap the range from GPU. If unmapping from GPU first, the bitmap_map flag is updated, split range may get incorrect bitmap_map for the remaining ranges. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 42 +++++++++++++++++++--------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a003406db067..7a30c3e58234 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2114,6 +2114,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, } else { unsigned long s, l; uint32_t trigger; + u64 size = 1UL << prange->granularity; + + if (!svm_range_partial_mapped(prange, start, last)) { + pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n", + prange->svms, start, last); + return 0; + } if (event == MMU_NOTIFY_MIGRATE) trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE; @@ -2122,16 +2129,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); + list_for_each_entry(pchild, &prange->child_list, child_list) { mutex_lock_nested(&pchild->lock, 1); - s = max(start, pchild->start); - l = min(last, pchild->last); + s = svm_range_align_start(start, pchild->start, size); + l = svm_range_align_last(last, pchild->last, size); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); mutex_unlock(&pchild->lock); } - s = max(start, prange->start); - l = min(last, prange->last); + s = svm_range_align_start(start, prange->start, size); + l = svm_range_align_last(last, prange->last, size); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); } @@ -2645,24 +2653,32 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, list_for_each_entry(pchild, &prange->child_list, child_list) { mutex_lock_nested(&pchild->lock, 1); - s = max(start, pchild->start); - l = min(last, pchild->last); - if (l >= s) - svm_range_unmap_from_gpus(pchild, s, l, trigger); svm_range_unmap_split(mm, prange, pchild, start, last); mutex_unlock(&pchild->lock); } - s = max(start, prange->start); - l = min(last, prange->last); - if (l >= s) - svm_range_unmap_from_gpus(prange, s, l, trigger); svm_range_unmap_split(mm, prange, prange, start, last); - if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); else svm_range_add_list_work(svms, prange, mm, SVM_OP_UPDATE_RANGE_NOTIFIER); + + list_for_each_entry(pchild, &prange->child_list, child_list) { + if (pchild->work_item.op != SVM_OP_UNMAP_RANGE) + continue; + + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l, trigger); + } + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l, trigger); + } + schedule_deferred_list_work(svms); kfd_unref_process(p); -- 2.35.1