On 2024-06-21 13:28, Xiaogang.Chen
wrote:
From: Xiaogang Chen <xiaogang.chen@xxxxxxx> When user adds new vm range that has overlapping with existing svm pranges current kfd clones new prange and remove existing pranges including all data associate with it. It is not necessary. We can handle the overlapping on existing pranges directly that would simplify kfd code. And, when remove a existing prange the locks from it will get destroyed. This may cause issue if code still use these locks. And locks from cloned prange do not inherit context of locks that got removed. This patch does not remove existing pranges or clone new pranges, keeps locks of pranges alive. Signed-off-by: Xiaogang Chen<Xiaogang.Chen@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 89 ++++------------------------ 1 file changed, 12 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 407636a68814..a8fcace6f9a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -904,23 +904,6 @@ svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, return (void *)dst; } -static int -svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src) -{ - int i; - - for (i = 0; i < MAX_GPU_INSTANCE; i++) { - if (!src->dma_addr[i]) - continue; - dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i], - sizeof(*src->dma_addr[i]), src->npages, 0, NULL); - if (!dst->dma_addr[i]) - return -ENOMEM; - } - - return 0; -} - static int svm_range_split_array(void *ppnew, void *ppold, size_t size, uint64_t old_start, uint64_t old_n, @@ -1967,38 +1950,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, return r; } -static struct svm_range *svm_range_clone(struct svm_range *old) -{ - struct svm_range *new; - - new = svm_range_new(old->svms, old->start, old->last, false); - if (!new) - return NULL; - if (svm_range_copy_dma_addrs(new, old)) { - svm_range_free(new, false); - return NULL; - } - if (old->svm_bo) { - new->ttm_res = old->ttm_res; - new->offset = old->offset; - new->svm_bo = svm_range_bo_ref(old->svm_bo); - spin_lock(&new->svm_bo->list_lock); - list_add(&new->svm_bo_list, &new->svm_bo->range_list); - spin_unlock(&new->svm_bo->list_lock); - } - new->flags = old->flags; - new->preferred_loc = old->preferred_loc; - new->prefetch_loc = old->prefetch_loc; - new->actual_loc = old->actual_loc; - new->granularity = old->granularity; - new->mapped_to_gpu = old->mapped_to_gpu; - new->vram_pages = old->vram_pages; - bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); - bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); - - return new; -} - void svm_range_set_max_pages(struct amdgpu_device *adev) { uint64_t max_pages; @@ -2057,7 +2008,6 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last, * @attrs: array of attributes * @update_list: output, the ranges need validate and update GPU mapping * @insert_list: output, the ranges need insert to svms - * @remove_list: output, the ranges are replaced and need remove from svms * @remap_list: output, remap unaligned svm ranges * * Check if the virtual address range has overlap with any existing ranges, @@ -2082,7 +2032,7 @@ static int svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, struct list_head *update_list, struct list_head *insert_list, - struct list_head *remove_list, struct list_head *remap_list) + struct list_head *remap_list) { unsigned long last = start + size - 1UL; struct svm_range_list *svms = &p->svms; @@ -2096,7 +2046,6 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); - INIT_LIST_HEAD(remove_list); INIT_LIST_HEAD(&new_list); INIT_LIST_HEAD(remap_list); @@ -2117,20 +2066,11 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, /* nothing to do */ } else if (node->start < start || node->last > last) { /* node intersects the update range and its attributes - * will change. Clone and split it, apply updates only + * will change. Split it, apply updates only * to the overlapping part */ - struct svm_range *old = prange; - - prange = svm_range_clone(old); - if (!prange) { - r = -ENOMEM; - goto out; - } - - list_add(&old->update_list, remove_list); - list_add(&prange->list, insert_list); - list_add(&prange->update_list, update_list); + list_move_tail(&prange->list, insert_list); + list_move_tail(&prange->update_list, update_list);
The main purpose to clone prange is for error handling rollback. If removing original prange from svms and update it on insert_list, how do you rollback to put prange back to svms after splitting prange error?
We hold svms lock to access prange, so it is impossible to access prange after it is removed from svms.
Regards,
Philip
if (node->start < start) { pr_debug("change old range start\n"); @@ -3533,7 +3473,6 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, struct amdkfd_process_info *process_info = p->kgd_process_info; struct list_head update_list; struct list_head insert_list; - struct list_head remove_list; struct list_head remap_list; struct svm_range_list *svms; struct svm_range *prange; @@ -3563,10 +3502,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, } mutex_lock(&svms->lock); - /* Add new range and split existing ranges as needed */ r = svm_range_add(p, start, size, nattr, attrs, &update_list, - &insert_list, &remove_list, &remap_list); + &insert_list, &remap_list); if (r) { mutex_unlock(&svms->lock); mmap_write_unlock(mm); @@ -3574,21 +3512,18 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, } /* Apply changes as a transaction */ list_for_each_entry_safe(prange, next, &insert_list, list) { - svm_range_add_to_svms(prange); - svm_range_add_notifier_locked(mm, prange); + /* prange can be new or old range, put it at svms->list */ + list_move_tail(&prange->list, &prange->svms->list); + /* update prange at interval trees: svms->objects and + * mm interval notifier tree + */ + svm_range_update_notifier_and_interval_tree(mm, prange); } + list_for_each_entry(prange, &update_list, update_list) { svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping); /* TODO: unmap ranges from GPU that lost access */ } - list_for_each_entry_safe(prange, next, &remove_list, update_list) { - pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", - prange->svms, prange, prange->start, - prange->last); - svm_range_unlink(prange); - svm_range_remove_notifier(prange); - svm_range_free(prange, false); - } mmap_write_downgrade(mm); /* Trigger migrations and revalidate and map to GPUs as needed. If