[PATCH v4 4/7] amd/amdkfd: Unmap range from GPU based on granularity

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When MMU notifier invalidate the range, align the start and last address
to range granularity to unmap from GPU and update bitmap_map flag.
Skip unmap from GPU if range is already unmapped based on bitmap_map
flag. This  avoids unmap 1 page from GPU and flush TLB, also solve
the rocgdb CWSR migration related issue.

Unmap the range from cpu will remove the range and split the range, this
cannot align the start and last address to range granularity. Change
to split the range and bitmap_map flag first, then unmap the range
from GPU. If unmapping from GPU first, the bitmap_map flag is updated,
split range may get incorrect bitmap_map for the remaining ranges.

Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 42 +++++++++++++++++++---------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a003406db067..7a30c3e58234 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2114,6 +2114,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 	} else {
 		unsigned long s, l;
 		uint32_t trigger;
+		u64 size = 1UL << prange->granularity;
+
+		if (!svm_range_partial_mapped(prange, start, last)) {
+			pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n",
+				 prange->svms, start, last);
+			return 0;
+		}
 
 		if (event == MMU_NOTIFY_MIGRATE)
 			trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
@@ -2122,16 +2129,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 
 		pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
 			 prange->svms, start, last);
+
 		list_for_each_entry(pchild, &prange->child_list, child_list) {
 			mutex_lock_nested(&pchild->lock, 1);
-			s = max(start, pchild->start);
-			l = min(last, pchild->last);
+			s = svm_range_align_start(start, pchild->start, size);
+			l = svm_range_align_last(last, pchild->last, size);
 			if (l >= s)
 				svm_range_unmap_from_gpus(pchild, s, l, trigger);
 			mutex_unlock(&pchild->lock);
 		}
-		s = max(start, prange->start);
-		l = min(last, prange->last);
+		s = svm_range_align_start(start, prange->start, size);
+		l = svm_range_align_last(last, prange->last, size);
 		if (l >= s)
 			svm_range_unmap_from_gpus(prange, s, l, trigger);
 	}
@@ -2645,24 +2653,32 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
 
 	list_for_each_entry(pchild, &prange->child_list, child_list) {
 		mutex_lock_nested(&pchild->lock, 1);
-		s = max(start, pchild->start);
-		l = min(last, pchild->last);
-		if (l >= s)
-			svm_range_unmap_from_gpus(pchild, s, l, trigger);
 		svm_range_unmap_split(mm, prange, pchild, start, last);
 		mutex_unlock(&pchild->lock);
 	}
-	s = max(start, prange->start);
-	l = min(last, prange->last);
-	if (l >= s)
-		svm_range_unmap_from_gpus(prange, s, l, trigger);
 	svm_range_unmap_split(mm, prange, prange, start, last);
-
 	if (unmap_parent)
 		svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
 	else
 		svm_range_add_list_work(svms, prange, mm,
 					SVM_OP_UPDATE_RANGE_NOTIFIER);
+
+	list_for_each_entry(pchild, &prange->child_list, child_list) {
+		if (pchild->work_item.op != SVM_OP_UNMAP_RANGE)
+			continue;
+
+		s = max(start, pchild->start);
+		l = min(last, pchild->last);
+		if (l >= s)
+			svm_range_unmap_from_gpus(pchild, s, l, trigger);
+	}
+	if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+		s = max(start, prange->start);
+		l = min(last, prange->last);
+		if (l >= s)
+			svm_range_unmap_from_gpus(prange, s, l, trigger);
+	}
+
 	schedule_deferred_list_work(svms);
 
 	kfd_unref_process(p);
-- 
2.35.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux