From: Alex Sierra <alex.sierra@xxxxxxx> svm_bo eviction mechanism is different from regular BOs. Every SVM_BO created contains one eviction fence and one worker item for eviction process. SVM_BOs can be attached to one or more pranges. For SVM_BO eviction mechanism, TTM will start to call enable_signal callback for every SVM_BO until VRAM space is available. Here, all the ttm_evict calls are synchronous, this guarantees that each eviction has completed and the fence has signaled before it returns. Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 197 ++++++++++++++++++++------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 13 +- 2 files changed, 160 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7346255f7c27..63b745a06740 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -34,6 +34,7 @@ #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 +static void svm_range_evict_svm_bo_worker(struct work_struct *work); /** * svm_range_unlink - unlink svm_range from lists and interval tree * @prange: svm range structure to be removed @@ -260,7 +261,15 @@ static void svm_range_bo_release(struct kref *kref) list_del_init(&prange->svm_bo_list); } spin_unlock(&svm_bo->list_lock); - + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { + /* We're not in the eviction worker. + * Signal the fence and synchronize with any + * pending eviction work. + */ + dma_fence_signal(&svm_bo->eviction_fence->base); + cancel_work_sync(&svm_bo->eviction_work); + } + dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); } @@ -273,6 +282,62 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) kref_put(&svm_bo->kref, svm_range_bo_release); } +static bool svm_range_validate_svm_bo(struct svm_range *prange) +{ + spin_lock(&prange->svm_bo_lock); + if (!prange->svm_bo) { + spin_unlock(&prange->svm_bo_lock); + return false; + } + if (prange->mm_nodes) { + /* We still have a reference, all is well */ + spin_unlock(&prange->svm_bo_lock); + return true; + } + if (svm_bo_ref_unless_zero(prange->svm_bo)) { + if (READ_ONCE(prange->svm_bo->evicting)) { + struct dma_fence *f; + struct svm_range_bo *svm_bo; + /* The BO is getting evicted, + * we need to get a new one + */ + spin_unlock(&prange->svm_bo_lock); + svm_bo = prange->svm_bo; + f = dma_fence_get(&svm_bo->eviction_fence->base); + svm_range_bo_unref(prange->svm_bo); + /* wait for the fence to avoid long spin-loop + * at list_empty_careful + */ + dma_fence_wait(f, false); + dma_fence_put(f); + } else { + /* The BO was still around and we got + * a new reference to it + */ + spin_unlock(&prange->svm_bo_lock); + pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->it_node.start, + prange->it_node.last); + + prange->mm_nodes = prange->svm_bo->bo->tbo.mem.mm_node; + return true; + } + + } else { + spin_unlock(&prange->svm_bo_lock); + } + + /* We need a new svm_bo. Spin-loop to wait for concurrent + * svm_range_bo_release to finish removing this range from + * its range list. After this, it is safe to reuse the + * svm_bo pointer and svm_bo_list head. + */ + while (!list_empty_careful(&prange->svm_bo_list)) + ; + + return false; +} + static struct svm_range_bo *svm_range_bo_new(void) { struct svm_range_bo *svm_bo; @@ -292,71 +357,54 @@ int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear) { - struct amdkfd_process_info *process_info; struct amdgpu_bo_param bp; struct svm_range_bo *svm_bo; struct amdgpu_bo *bo; struct kfd_process *p; + struct mm_struct *mm; int r; - pr_debug("[0x%lx 0x%lx]\n", prange->it_node.start, - prange->it_node.last); - spin_lock(&prange->svm_bo_lock); - if (prange->svm_bo) { - if (prange->mm_nodes) { - /* We still have a reference, all is well */ - spin_unlock(&prange->svm_bo_lock); - return 0; - } - if (svm_bo_ref_unless_zero(prange->svm_bo)) { - /* The BO was still around and we got - * a new reference to it - */ - spin_unlock(&prange->svm_bo_lock); - pr_debug("reuse old bo [0x%lx 0x%lx]\n", - prange->it_node.start, prange->it_node.last); - - prange->mm_nodes = prange->svm_bo->bo->tbo.mem.mm_node; - return 0; - } - - spin_unlock(&prange->svm_bo_lock); - - /* We need a new svm_bo. Spin-loop to wait for concurrent - * svm_range_bo_release to finish removing this range from - * its range list. After this, it is safe to reuse the - * svm_bo pointer and svm_bo_list head. - */ - while (!list_empty_careful(&prange->svm_bo_list)) - ; + p = container_of(prange->svms, struct kfd_process, svms); + pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, + prange->it_node.start, prange->it_node.last); - } else { - spin_unlock(&prange->svm_bo_lock); - } + if (svm_range_validate_svm_bo(prange)) + return 0; svm_bo = svm_range_bo_new(); if (!svm_bo) { pr_debug("failed to alloc svm bo\n"); return -ENOMEM; } - + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("failed to get mm\n"); + kfree(svm_bo); + return -ESRCH; + } + svm_bo->svms = prange->svms; + svm_bo->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + mm, + svm_bo); + mmput(mm); + INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker); + svm_bo->evicting = 0; memset(&bp, 0, sizeof(bp)); bp.size = prange->npages * PAGE_SIZE; bp.byte_align = PAGE_SIZE; bp.domain = AMDGPU_GEM_DOMAIN_VRAM; bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0; + bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO; bp.type = ttm_bo_type_device; bp.resv = NULL; r = amdgpu_bo_create(adev, &bp, &bo); if (r) { pr_debug("failed %d to create bo\n", r); - kfree(svm_bo); - return r; + goto create_bo_failed; } - - p = container_of(prange->svms, struct kfd_process, svms); r = amdgpu_bo_reserve(bo, true); if (r) { pr_debug("failed %d to reserve bo\n", r); @@ -369,8 +417,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, amdgpu_bo_unreserve(bo); goto reserve_bo_failed; } - process_info = p->kgd_process_info; - amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true); amdgpu_bo_unreserve(bo); @@ -380,14 +427,16 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, prange->offset = 0; spin_lock(&svm_bo->list_lock); - list_add(&prange->svm_bo_list, &svm_bo->range_list); + list_add_rcu(&prange->svm_bo_list, &svm_bo->range_list); spin_unlock(&svm_bo->list_lock); return 0; reserve_bo_failed: - kfree(svm_bo); amdgpu_bo_unref(&bo); +create_bo_failed: + dma_fence_put(&svm_bo->eviction_fence->base); + kfree(svm_bo); prange->mm_nodes = NULL; return r; @@ -621,7 +670,7 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->mm_nodes = old->mm_nodes; spin_lock(&new->svm_bo->list_lock); - list_add(&new->svm_bo_list, &new->svm_bo->range_list); + list_add_rcu(&new->svm_bo_list, &new->svm_bo->range_list); spin_unlock(&new->svm_bo->list_lock); return 0; @@ -1353,7 +1402,7 @@ struct svm_range *svm_range_clone(struct svm_range *old) new->offset = old->offset; new->svm_bo = svm_range_bo_ref(old->svm_bo); spin_lock(&new->svm_bo->list_lock); - list_add(&new->svm_bo_list, &new->svm_bo->range_list); + list_add_rcu(&new->svm_bo_list, &new->svm_bo->range_list); spin_unlock(&new->svm_bo->list_lock); } new->flags = old->flags; @@ -1964,6 +2013,62 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, return r; } +int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) +{ + if (!fence) + return -EINVAL; + + if (dma_fence_is_signaled(&fence->base)) + return 0; + + if (fence->svm_bo) { + WRITE_ONCE(fence->svm_bo->evicting, 1); + schedule_work(&fence->svm_bo->eviction_work); + } + + return 0; +} + +static void svm_range_evict_svm_bo_worker(struct work_struct *work) +{ + struct svm_range_bo *svm_bo; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int srcu_idx; + + svm_bo = container_of(work, struct svm_range_bo, eviction_work); + if (!svm_bo_ref_unless_zero(svm_bo)) + return; /* svm_bo was freed while eviction was pending */ + + /* svm_range_bo_release destroys this worker thread. So during + * the lifetime of this thread, kfd_process and mm will be valid. + */ + p = container_of(svm_bo->svms, struct kfd_process, svms); + mm = p->mm; + if (!mm) + return; + + mmap_read_lock(mm); + srcu_idx = srcu_read_lock(&svm_bo->svms->srcu); + list_for_each_entry_rcu(prange, &svm_bo->range_list, svm_bo_list) { + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange->it_node.start, prange->it_node.last); + mutex_lock(&prange->mutex); + svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm); + mutex_unlock(&prange->mutex); + } + srcu_read_unlock(&svm_bo->svms->srcu, srcu_idx); + mmap_read_unlock(mm); + + dma_fence_signal(&svm_bo->eviction_fence->base); + /* This is the last reference to svm_bo, after svm_range_vram_node_free + * has been called in svm_migrate_vram_to_ram + */ + WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n"); + svm_range_bo_unref(svm_bo); +} + static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index e546f36ef709..143573621956 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -33,10 +33,14 @@ #include "kfd_priv.h" struct svm_range_bo { - struct amdgpu_bo *bo; - struct kref kref; - struct list_head range_list; /* all svm ranges shared this bo */ - spinlock_t list_lock; + struct amdgpu_bo *bo; + struct kref kref; + struct list_head range_list; /* all svm ranges shared this bo */ + spinlock_t list_lock; + struct amdgpu_amdkfd_fence *eviction_fence; + struct work_struct eviction_work; + struct svm_range_list *svms; + uint32_t evicting; }; /** * struct svm_range - shared virtual memory range @@ -123,5 +127,6 @@ int svm_range_split_by_granularity(struct kfd_process *p, unsigned long addr, struct list_head *list); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr); +int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); #endif /* KFD_SVM_H_ */ -- 2.29.2 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel