[PATCH v3] amd/amdkfd: Set correct svm range actual loc after spliting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



While svm range partial migrating to system memory, clear dma_addr vram
domain flag, otherwise the future split will get incorrect vram_pages
and actual loc.

After range spliting, set new range and old range actual_loc:
new range actual_loc is 0 if new->vram_pages is 0.
old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0.

new range takes svm_bo ref only if vram_pages not equal to 0.

Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 20 +++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 24 ++++++++++++++----------
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index f856901055d3..dae05f70257b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -563,18 +563,30 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 			struct migrate_vma *migrate, struct dma_fence **mfence,
 			dma_addr_t *scratch, uint64_t npages)
 {
+	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
 	struct device *dev = adev->dev;
+	dma_addr_t *dma_addr;
 	uint64_t *src;
 	dma_addr_t *dst;
 	struct page *dpage;
 	uint64_t i = 0, j;
 	uint64_t addr;
+	s32 gpuidx;
+	u64 offset;
 	int r = 0;
 
 	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 		 prange->last);
 
-	addr = prange->start << PAGE_SHIFT;
+	gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc);
+	if (gpuidx < 0) {
+		pr_debug("no GPU id 0x%x found\n", prange->actual_loc);
+		return -EINVAL;
+	}
+
+	addr = migrate->start;
+	offset = (addr >> PAGE_SHIFT) - prange->start;
+	dma_addr = prange->dma_addr[gpuidx];
 
 	src = (uint64_t *)(scratch + npages);
 	dst = scratch;
@@ -623,6 +635,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 			goto out_oom;
 		}
 
+		/* Clear VRAM flag when page is migrated to ram, to count vram
+		 * pages correctly when spliting the range.
+		 */
+		if (dma_addr && (dma_addr[offset + i] & SVM_RANGE_VRAM_DOMAIN))
+			dma_addr[offset + i] = 0;
+
 		pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
 				     dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cc24f30f88fb..35ee9e648cca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -362,7 +362,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	INIT_LIST_HEAD(&prange->child_list);
 	atomic_set(&prange->invalid, 0);
 	prange->validate_timestamp = 0;
-	prange->vram_pages = 0;
 	mutex_init(&prange->migrate_mutex);
 	mutex_init(&prange->lock);
 
@@ -980,9 +979,14 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
 		if (r)
 			return r;
 	}
-	if (old->actual_loc)
+	if (old->actual_loc && new->vram_pages) {
 		old->vram_pages -= new->vram_pages;
-
+		new->actual_loc = old->actual_loc;
+		if (!old->vram_pages)
+			old->actual_loc = 0;
+	}
+	pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx loc 0x%x\n",
+		 new->vram_pages, new->actual_loc, old->vram_pages, old->actual_loc);
 	return 0;
 }
 
@@ -1002,13 +1006,14 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
 		new->offset = old->offset + npages;
 	}
 
-	new->svm_bo = svm_range_bo_ref(old->svm_bo);
-	new->ttm_res = old->ttm_res;
-
-	spin_lock(&new->svm_bo->list_lock);
-	list_add(&new->svm_bo_list, &new->svm_bo->range_list);
-	spin_unlock(&new->svm_bo->list_lock);
+	if (new->vram_pages) {
+		new->svm_bo = svm_range_bo_ref(old->svm_bo);
+		new->ttm_res = old->ttm_res;
 
+		spin_lock(&new->svm_bo->list_lock);
+		list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+		spin_unlock(&new->svm_bo->list_lock);
+	}
 	return 0;
 }
 
@@ -1058,7 +1063,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
 	new->flags = old->flags;
 	new->preferred_loc = old->preferred_loc;
 	new->prefetch_loc = old->prefetch_loc;
-	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
 	new->mapped_to_gpu = old->mapped_to_gpu;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
-- 
2.35.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux