While svm range partial migrating to system memory, clear dma_addr vram domain flag, otherwise the future split will get incorrect vram_pages and actual loc. After range spliting, set new range and old range actual_loc: new range actual_loc is 0 if new->vram_pages is 0. old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0. new range takes svm_bo ref only if vram_pages not equal to 0. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 20 +++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f856901055d3..dae05f70257b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -563,18 +563,30 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch, uint64_t npages) { + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); struct device *dev = adev->dev; + dma_addr_t *dma_addr; uint64_t *src; dma_addr_t *dst; struct page *dpage; uint64_t i = 0, j; uint64_t addr; + s32 gpuidx; + u64 offset; int r = 0; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - addr = prange->start << PAGE_SHIFT; + gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc); + if (gpuidx < 0) { + pr_debug("no GPU id 0x%x found\n", prange->actual_loc); + return -EINVAL; + } + + addr = migrate->start; + offset = (addr >> PAGE_SHIFT) - prange->start; + dma_addr = prange->dma_addr[gpuidx]; src = (uint64_t *)(scratch + npages); dst = scratch; @@ -623,6 +635,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, goto out_oom; } + /* Clear VRAM flag when page is migrated to ram, to count vram + * pages correctly when spliting the range. + */ + if (dma_addr && (dma_addr[offset + i] & SVM_RANGE_VRAM_DOMAIN)) + dma_addr[offset + i] = 0; + pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n", dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index cc24f30f88fb..35ee9e648cca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -362,7 +362,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); prange->validate_timestamp = 0; - prange->vram_pages = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); @@ -980,9 +979,14 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old, if (r) return r; } - if (old->actual_loc) + if (old->actual_loc && new->vram_pages) { old->vram_pages -= new->vram_pages; - + new->actual_loc = old->actual_loc; + if (!old->vram_pages) + old->actual_loc = 0; + } + pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx loc 0x%x\n", + new->vram_pages, new->actual_loc, old->vram_pages, old->actual_loc); return 0; } @@ -1002,13 +1006,14 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->offset = old->offset + npages; } - new->svm_bo = svm_range_bo_ref(old->svm_bo); - new->ttm_res = old->ttm_res; - - spin_lock(&new->svm_bo->list_lock); - list_add(&new->svm_bo_list, &new->svm_bo->range_list); - spin_unlock(&new->svm_bo->list_lock); + if (new->vram_pages) { + new->svm_bo = svm_range_bo_ref(old->svm_bo); + new->ttm_res = old->ttm_res; + spin_lock(&new->svm_bo->list_lock); + list_add(&new->svm_bo_list, &new->svm_bo->range_list); + spin_unlock(&new->svm_bo->list_lock); + } return 0; } @@ -1058,7 +1063,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->flags = old->flags; new->preferred_loc = old->preferred_loc; new->prefetch_loc = old->prefetch_loc; - new->actual_loc = old->actual_loc; new->granularity = old->granularity; new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); -- 2.35.1