comments inline. [xh] > 2020年2月10日 23:09,Christian König <ckoenig.leichtzumerken@xxxxxxxxx> 写道: > > This patch reworks the whole delayed deletion of BOs which aren't idle. > > Instead of having two counters for the BO structure we resurrect the BO > when we find that a deleted BO is not idle yet. > > This has many advantages, especially that we don't need to > increment/decrement the BOs reference counter any more when it > moves on the LRUs. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++----------------- > drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - > include/drm/ttm/ttm_bo_api.h | 11 +- > 3 files changed, 97 insertions(+), 132 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index e12fc2c2d165..d0624685f5d2 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type) > return 1 << (type); > } > > -static void ttm_bo_release_list(struct kref *list_kref) > -{ > - struct ttm_buffer_object *bo = > - container_of(list_kref, struct ttm_buffer_object, list_kref); > - size_t acc_size = bo->acc_size; > - > - BUG_ON(kref_read(&bo->list_kref)); > - BUG_ON(kref_read(&bo->kref)); > - BUG_ON(bo->mem.mm_node != NULL); > - BUG_ON(!list_empty(&bo->lru)); > - BUG_ON(!list_empty(&bo->ddestroy)); > - ttm_tt_destroy(bo->ttm); > - atomic_dec(&ttm_bo_glob.bo_count); > - dma_fence_put(bo->moving); > - if (!ttm_bo_uses_embedded_gem_object(bo)) > - dma_resv_fini(&bo->base._resv); > - bo->destroy(bo); > - ttm_mem_global_free(&ttm_mem_glob, acc_size); > -} > - > static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, > struct ttm_mem_reg *mem) > { > @@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, > > man = &bdev->man[mem->mem_type]; > list_add_tail(&bo->lru, &man->lru[bo->priority]); > - kref_get(&bo->list_kref); > > if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && > !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | > TTM_PAGE_FLAG_SWAPPED))) { > list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]); > - kref_get(&bo->list_kref); > } > } > > -static void ttm_bo_ref_bug(struct kref *list_kref) > -{ > - BUG(); > -} > - > static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) > { > struct ttm_bo_device *bdev = bo->bdev; > @@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) > > if (!list_empty(&bo->swap)) { > list_del_init(&bo->swap); > - kref_put(&bo->list_kref, ttm_bo_ref_bug); > notify = true; > } > if (!list_empty(&bo->lru)) { > list_del_init(&bo->lru); > - kref_put(&bo->list_kref, ttm_bo_ref_bug); > notify = true; > } > > @@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) > BUG_ON(!dma_resv_trylock(&bo->base._resv)); > > r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); > - if (r) > - dma_resv_unlock(&bo->base._resv); > + dma_resv_unlock(&bo->base._resv); > > return r; > } > @@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) > rcu_read_unlock(); > } > > -static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) > -{ > - struct ttm_bo_device *bdev = bo->bdev; > - int ret; > - > - ret = ttm_bo_individualize_resv(bo); > - if (ret) { > - /* Last resort, if we fail to allocate memory for the > - * fences block for the BO to become idle > - */ > - dma_resv_wait_timeout_rcu(bo->base.resv, true, false, > - 30 * HZ); > - spin_lock(&ttm_bo_glob.lru_lock); > - goto error; > - } > - > - spin_lock(&ttm_bo_glob.lru_lock); > - ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY; > - if (!ret) { > - if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) { > - ttm_bo_del_from_lru(bo); > - spin_unlock(&ttm_bo_glob.lru_lock); > - if (bo->base.resv != &bo->base._resv) > - dma_resv_unlock(&bo->base._resv); > - > - ttm_bo_cleanup_memtype_use(bo); > - dma_resv_unlock(bo->base.resv); > - return; > - } > - > - ttm_bo_flush_all_fences(bo); > - > - /* > - * Make NO_EVICT bos immediately available to > - * shrinkers, now that they are queued for > - * destruction. > - */ > - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { > - bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; > - ttm_bo_move_to_lru_tail(bo, NULL); > - } > - > - dma_resv_unlock(bo->base.resv); > - } > - if (bo->base.resv != &bo->base._resv) { > - ttm_bo_flush_all_fences(bo); > - dma_resv_unlock(&bo->base._resv); > - } > - > -error: > - kref_get(&bo->list_kref); > - list_add_tail(&bo->ddestroy, &bdev->ddestroy); > - spin_unlock(&ttm_bo_glob.lru_lock); > - > - schedule_delayed_work(&bdev->wq, > - ((HZ / 100) < 1) ? 1 : HZ / 100); > -} > - > /** > * function ttm_bo_cleanup_refs > - * If bo idle, remove from delayed- and lru lists, and unref. > - * If not idle, do nothing. > + * If bo idle, remove from lru lists, and unref. > + * If not idle, block if possible. > * > * Must be called with lru_lock and reservation held, this function > * will drop the lru lock and optionally the reservation lock before returning. > @@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > > ttm_bo_del_from_lru(bo); > list_del_init(&bo->ddestroy); > - kref_put(&bo->list_kref, ttm_bo_ref_bug); > - > spin_unlock(&ttm_bo_glob.lru_lock); > ttm_bo_cleanup_memtype_use(bo); > > if (unlock_resv) > dma_resv_unlock(bo->base.resv); > > + ttm_bo_put(bo); > + > return 0; > } > > @@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) > > bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object, > ddestroy); > - kref_get(&bo->list_kref); > list_move_tail(&bo->ddestroy, &removed); > + if (!ttm_bo_get_unless_zero(bo)) > + continue; > > if (remove_all || bo->base.resv != &bo->base._resv) { > spin_unlock(&glob->lru_lock); > @@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all) > spin_unlock(&glob->lru_lock); > } > > - kref_put(&bo->list_kref, ttm_bo_release_list); > + ttm_bo_put(bo); > spin_lock(&glob->lru_lock); > } > list_splice_tail(&removed, &bdev->ddestroy); > @@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref) > container_of(kref, struct ttm_buffer_object, kref); > struct ttm_bo_device *bdev = bo->bdev; > struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; > + size_t acc_size = bo->acc_size; > + int ret; > > - if (bo->bdev->driver->release_notify) > - bo->bdev->driver->release_notify(bo); > + if (!bo->deleted) { > + if (bo->bdev->driver->release_notify) > + bo->bdev->driver->release_notify(bo); > > - drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); > - ttm_mem_io_lock(man, false); > - ttm_mem_io_free_vm(bo); > - ttm_mem_io_unlock(man); > - ttm_bo_cleanup_refs_or_queue(bo); > - kref_put(&bo->list_kref, ttm_bo_release_list); > + drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); > + ttm_mem_io_lock(man, false); > + ttm_mem_io_free_vm(bo); > + ttm_mem_io_unlock(man); > + > + ret = ttm_bo_individualize_resv(bo); > + if (ret) { > + /* Last resort, if we fail to allocate memory for the > + * fences block for the BO to become idle > + */ > + dma_resv_wait_timeout_rcu(bo->base.resv, true, false, > + 30 * HZ); > + } > + } > + > + if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) { > + /* The BO is not idle, resurrect it for delayed destroy */ > + ttm_bo_flush_all_fences(bo); > + bo->deleted = true; > + > + /* > + * Make NO_EVICT bos immediately available to > + * shrinkers, now that they are queued for > + * destruction. > + */ > + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { > + bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; > + ttm_bo_move_to_lru_tail(bo, NULL); [xh] this should be under lru lock. > + } > + > + spin_lock(&ttm_bo_glob.lru_lock); > + kref_init(&bo->kref); > + list_add_tail(&bo->ddestroy, &bdev->ddestroy); > + spin_unlock(&ttm_bo_glob.lru_lock); > + > + schedule_delayed_work(&bdev->wq, > + ((HZ / 100) < 1) ? 1 : HZ / 100); > + return; > + } > + > + spin_lock(&ttm_bo_glob.lru_lock); > + ttm_bo_del_from_lru(bo); > + list_del(&bo->ddestroy); > + spin_unlock(&ttm_bo_glob.lru_lock); > + > + ttm_bo_cleanup_memtype_use(bo); > + > + BUG_ON(bo->mem.mm_node != NULL); > + ttm_tt_destroy(bo->ttm); [xh] already destroy it in ttm_bo_cleanup_memtype_use. > + atomic_dec(&ttm_bo_glob.bo_count); > + dma_fence_put(bo->moving); > + if (!ttm_bo_uses_embedded_gem_object(bo)) > + dma_resv_fini(&bo->base._resv); > + bo->destroy(bo); > + ttm_mem_global_free(&ttm_mem_glob, acc_size); > } > > void ttm_bo_put(struct ttm_buffer_object *bo) > @@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, > > if (bo->base.resv == ctx->resv) { > dma_resv_assert_held(bo->base.resv); > - if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT > - || !list_empty(&bo->ddestroy)) > + if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted) > ret = true; > *locked = false; > if (busy) > @@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > dma_resv_unlock(bo->base.resv); > continue; > } > + if (!ttm_bo_get_unless_zero(bo)) { > + if (locked) > + dma_resv_unlock(bo->base.resv); > + continue; > + } > break; > } > > @@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > } > > if (!bo) { > - if (busy_bo) > - kref_get(&busy_bo->list_kref); > + if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) > + busy_bo = NULL; > spin_unlock(&ttm_bo_glob.lru_lock); > ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); > if (busy_bo) > - kref_put(&busy_bo->list_kref, ttm_bo_release_list); > + ttm_bo_put(busy_bo); > return ret; > } > > - kref_get(&bo->list_kref); > - > - if (!list_empty(&bo->ddestroy)) { > + if (bo->deleted) { > ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, > ctx->no_wait_gpu, locked); > - kref_put(&bo->list_kref, ttm_bo_release_list); > + ttm_bo_put(bo); > return ret; > } > > @@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > if (locked) > ttm_bo_unreserve(bo); > > - kref_put(&bo->list_kref, ttm_bo_release_list); > + ttm_bo_put(bo); > return ret; > } > > @@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, > bo->destroy = destroy ? destroy : ttm_bo_default_destroy; > > kref_init(&bo->kref); > - kref_init(&bo->list_kref); > INIT_LIST_HEAD(&bo->lru); > INIT_LIST_HEAD(&bo->ddestroy); > INIT_LIST_HEAD(&bo->swap); > @@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) > spin_lock(&glob->lru_lock); > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > list_for_each_entry(bo, &glob->swap_lru[i], swap) { > - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked, > - NULL)) { > - ret = 0; > - break; > + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, > + NULL)) > + continue; > + > + if (!ttm_bo_get_unless_zero(bo)) { > + if (locked) > + dma_resv_unlock(bo->base.resv); > + continue; > } [xh] As you get the bo. when you put it? You only put one bo just before return. BUT you get the bos in the for loop. Am I missing somethings? thanks xinhui > + > + ret = 0; > + break; > } > if (!ret) > break; > @@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) > return ret; > } > > - kref_get(&bo->list_kref); > - > - if (!list_empty(&bo->ddestroy)) { > + if (bo->deleted) { > ret = ttm_bo_cleanup_refs(bo, false, false, locked); > - kref_put(&bo->list_kref, ttm_bo_release_list); > + ttm_bo_put(bo); > return ret; > } > > @@ -1877,7 +1848,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) > */ > if (locked) > dma_resv_unlock(bo->base.resv); > - kref_put(&bo->list_kref, ttm_bo_release_list); > + ttm_bo_put(bo); > return ret; > } > EXPORT_SYMBOL(ttm_bo_swapout); > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 86d152472f38..c8e359ded1df 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, > fbo->base.moving = NULL; > drm_vma_node_reset(&fbo->base.base.vma_node); > > - kref_init(&fbo->base.list_kref); > kref_init(&fbo->base.kref); > fbo->base.destroy = &ttm_transfered_destroy; > fbo->base.acc_size = 0; > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 66ca49db9633..b9bc1b00142e 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -135,18 +135,14 @@ struct ttm_tt; > * @num_pages: Actual number of pages. > * @acc_size: Accounted size for this object. > * @kref: Reference count of this buffer object. When this refcount reaches > - * zero, the object is put on the delayed delete list. > - * @list_kref: List reference count of this buffer object. This member is > - * used to avoid destruction while the buffer object is still on a list. > - * Lru lists may keep one refcount, the delayed delete list, and kref != 0 > - * keeps one refcount. When this refcount reaches zero, > - * the object is destroyed. > + * zero, the object is destroyed or put on the delayed delete list. > * @mem: structure describing current placement. > * @persistent_swap_storage: Usually the swap storage is deleted for buffers > * pinned in physical memory. If this behaviour is not desired, this member > * holds a pointer to a persistent shmem object. > * @ttm: TTM structure holding system pages. > * @evicted: Whether the object was evicted without user-space knowing. > + * @deleted: True if the object is only a zombie and already deleted. > * @lru: List head for the lru list. > * @ddestroy: List head for the delayed destroy list. > * @swap: List head for swap LRU list. > @@ -183,9 +179,7 @@ struct ttm_buffer_object { > /** > * Members not needing protection. > */ > - > struct kref kref; > - struct kref list_kref; > > /** > * Members protected by the bo::resv::reserved lock. > @@ -195,6 +189,7 @@ struct ttm_buffer_object { > struct file *persistent_swap_storage; > struct ttm_tt *ttm; > bool evicted; > + bool deleted; > > /** > * Members protected by the bdev::lru_lock. > -- > 2.17.1 > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx