After svm range partial migrating to system memory, unmap to cleanup the corresponding dma_addr vram domain flag, otherwise the future split will get incorrect vram_pages and actual loc. After range spliting, set new range and old range actual_loc: new range actual_loc is 0 if new->vram_pages is 0. old range actual_loc is 0 if old->vram_pages - new->vram_pages == 0. new range takes svm_bo ref only if vram_pages not equal to 0. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 ++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 +++++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 +- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f856901055d3..e85bcda29db6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -839,6 +839,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, prange->actual_loc = 0; svm_range_vram_node_free(prange); } + + svm_range_dma_unmap(prange, start_mgr - prange->start, + last_mgr - start_mgr + 1); } return r < 0 ? r : 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index cc24f30f88fb..2202bdcde057 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -254,6 +254,10 @@ void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr, return; for (i = offset; i < offset + npages; i++) { + if (dma_addr[i] & SVM_RANGE_VRAM_DOMAIN) { + dma_addr[i] = 0; + continue; + } if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i])) continue; pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); @@ -262,7 +266,8 @@ void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr, } } -void svm_range_dma_unmap(struct svm_range *prange) +void svm_range_dma_unmap(struct svm_range *prange, unsigned long offset, + unsigned long npages) { struct kfd_process_device *pdd; dma_addr_t *dma_addr; @@ -284,7 +289,7 @@ void svm_range_dma_unmap(struct svm_range *prange) } dev = &pdd->dev->adev->pdev->dev; - svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages); + svm_range_dma_unmap_dev(dev, dma_addr, offset, npages); } } @@ -299,7 +304,7 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) svm_range_vram_node_free(prange); if (do_unmap) - svm_range_dma_unmap(prange); + svm_range_dma_unmap(prange, 0, prange->npages); if (do_unmap && !p->xnack_enabled) { pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size); @@ -362,7 +367,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); prange->validate_timestamp = 0; - prange->vram_pages = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); @@ -980,9 +984,14 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old, if (r) return r; } - if (old->actual_loc) + if (old->actual_loc && new->vram_pages) { old->vram_pages -= new->vram_pages; - + new->actual_loc = old->actual_loc; + if (!old->vram_pages) + old->actual_loc = 0; + } + pr_debug("new->vram_pages 0x%llx loc 0x%x old->vram_pages 0x%llx loc 0x%x\n", + new->vram_pages, new->actual_loc, old->vram_pages, old->actual_loc); return 0; } @@ -1002,13 +1011,14 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->offset = old->offset + npages; } - new->svm_bo = svm_range_bo_ref(old->svm_bo); - new->ttm_res = old->ttm_res; - - spin_lock(&new->svm_bo->list_lock); - list_add(&new->svm_bo_list, &new->svm_bo->range_list); - spin_unlock(&new->svm_bo->list_lock); + if (new->vram_pages) { + new->svm_bo = svm_range_bo_ref(old->svm_bo); + new->ttm_res = old->ttm_res; + spin_lock(&new->svm_bo->list_lock); + list_add(&new->svm_bo_list, &new->svm_bo->range_list); + spin_unlock(&new->svm_bo->list_lock); + } return 0; } @@ -1058,7 +1068,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->flags = old->flags; new->preferred_loc = old->preferred_loc; new->prefetch_loc = old->prefetch_loc; - new->actual_loc = old->actual_loc; new->granularity = old->granularity; new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 026863a0abcd..778f108911cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -182,7 +182,8 @@ void svm_range_add_list_work(struct svm_range_list *svms, void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); -void svm_range_dma_unmap(struct svm_range *prange); +void svm_range_dma_unmap(struct svm_range *prange, unsigned long offset, + unsigned long npages); int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, uint64_t *svm_priv_data_size); int kfd_criu_checkpoint_svm(struct kfd_process *p, -- 2.35.1