On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote: > On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote: > > From: Chunming Zhou <david1.zhou@xxxxxxx> > > > > heavy gpu job could occupy memory long time, which lead other user fail to get memory. > > > > basically pick up Christian idea: > > > > 1. Reserve the BO in DC using a ww_mutex ticket (trivial). > > 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned. > > 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket. > > 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved). > > 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO. > > 6. If any of the "If's" above fail we just back off and return -EBUSY. > > > > v2: fix some minor check > > v3: address Christian v2 comments. > > v4: fix some missing > > v5: handle first_bo unlock and bo_get/put > > v6: abstract unified iterate function, and handle all possible usecase not only pinned bo. > > v7: pass request bo->resv to ttm_bo_evict_first > > v8 (chk): minimal coding style fix > > > > Change-Id: I21423fb922f885465f13833c41df1e134364a8e7 > > Signed-off-by: Chunming Zhou <david1.zhou@xxxxxxx> > > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > I think this closes a big gap between ttm and the bkl/struct_mutex > drivers - it's much easier to guarantee you can evict everything if > there's only a single lock :-) > > Would be absolutely awesome if we could extract this as some kind of > building block, like we've done with lots of other ttm concepts already > (reservation_obj, fences, ...). > > Just an aside really. Ofc this is meant as a comment on the entire patch series, without all the other patches to make sure BO always stay on a relevant LRU there's still gaps in the guaranteed forward progress eviction algorithm. -Daniel > -Daniel > > > --- > > drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------ > > 1 file changed, 96 insertions(+), 17 deletions(-) > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > > index 2845fceb2fbd..e634d3a36923 100644 > > --- a/drivers/gpu/drm/ttm/ttm_bo.c > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > > @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable); > > * b. Otherwise, trylock it. > > */ > > static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, > > - struct ttm_operation_ctx *ctx, bool *locked) > > + struct ttm_operation_ctx *ctx, bool *locked, bool *busy) > > { > > bool ret = false; > > > > *locked = false; > > + if (busy) > > + *busy = false; > > if (bo->resv == ctx->resv) { > > reservation_object_assert_held(bo->resv); > > if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT > > @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, > > } else { > > *locked = reservation_object_trylock(bo->resv); > > ret = *locked; > > + if (!ret && busy) > > + *busy = true; > > } > > > > return ret; > > } > > > > -static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > > - uint32_t mem_type, > > - const struct ttm_place *place, > > - struct ttm_operation_ctx *ctx) > > +static struct ttm_buffer_object* > > +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev, > > + struct ttm_mem_type_manager *man, > > + const struct ttm_place *place, > > + struct ttm_operation_ctx *ctx, > > + struct ttm_buffer_object **first_bo, > > + bool *locked) > > { > > - struct ttm_bo_global *glob = bdev->glob; > > - struct ttm_mem_type_manager *man = &bdev->man[mem_type]; > > struct ttm_buffer_object *bo = NULL; > > - bool locked = false; > > - unsigned i; > > - int ret; > > + int i; > > > > - spin_lock(&glob->lru_lock); > > + if (first_bo) > > + *first_bo = NULL; > > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > > list_for_each_entry(bo, &man->lru[i], lru) { > > - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) > > + bool busy = false; > > + > > + if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked, > > + &busy)) { > > + if (first_bo && !(*first_bo) && busy) { > > + ttm_bo_get(bo); > > + *first_bo = bo; > > + } > > continue; > > + } > > > > if (place && !bdev->driver->eviction_valuable(bo, > > place)) { > > - if (locked) > > + if (*locked) > > reservation_object_unlock(bo->resv); > > continue; > > } > > + > > break; > > } > > > > @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > > bo = NULL; > > } > > > > + return bo; > > +} > > + > > +static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > > + uint32_t mem_type, > > + const struct ttm_place *place, > > + struct ttm_operation_ctx *ctx, > > + struct reservation_object *request_resv) > > +{ > > + struct ttm_bo_global *glob = bdev->glob; > > + struct ttm_mem_type_manager *man = &bdev->man[mem_type]; > > + struct ttm_buffer_object *bo = NULL, *first_bo = NULL; > > + bool locked = false; > > + int ret; > > + > > + spin_lock(&glob->lru_lock); > > + bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo, > > + &locked); > > if (!bo) { > > + struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx; > > + struct ttm_operation_ctx busy_ctx; > > + > > spin_unlock(&glob->lru_lock); > > - return -EBUSY; > > + /* check if other user occupy memory too long time */ > > + if (!first_bo || !request_resv || !request_resv->lock.ctx) { > > + if (first_bo) > > + ttm_bo_put(first_bo); > > + return -EBUSY; > > + } > > + if (first_bo->resv == request_resv) { > > + ttm_bo_put(first_bo); > > + return -EBUSY; > > + } > > + if (ctx->interruptible) > > + ret = ww_mutex_lock_interruptible(&first_bo->resv->lock, > > + acquire_ctx); > > + else > > + ret = ww_mutex_lock(&first_bo->resv->lock, > > + acquire_ctx); > > + if (ret) { > > + ttm_bo_put(first_bo); > > + return ret; > > + } > > + spin_lock(&glob->lru_lock); > > + /* previous busy resv lock is held by above, idle now, > > + * so let them evictable. > > + */ > > + busy_ctx.interruptible = ctx->interruptible; > > + busy_ctx.no_wait_gpu = ctx->no_wait_gpu; > > + busy_ctx.resv = first_bo->resv; > > + busy_ctx.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT; > > + > > + bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL, > > + &locked); > > + if (bo && (bo->resv == first_bo->resv)) > > + locked = true; > > + else if (bo) > > + ww_mutex_unlock(&first_bo->resv->lock); > > + if (!bo) { > > + spin_unlock(&glob->lru_lock); > > + ttm_bo_put(first_bo); > > + return -EBUSY; > > + } > > } > > > > kref_get(&bo->list_kref); > > @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > > ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, > > ctx->no_wait_gpu, locked); > > kref_put(&bo->list_kref, ttm_bo_release_list); > > + if (first_bo) > > + ttm_bo_put(first_bo); > > return ret; > > } > > > > ttm_bo_del_from_lru(bo); > > spin_unlock(&glob->lru_lock); > > + if (first_bo) > > + ttm_bo_put(first_bo); > > > > ret = ttm_bo_evict(bo, ctx); > > if (locked) { > > @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, > > return ret; > > if (mem->mm_node) > > break; > > - ret = ttm_mem_evict_first(bdev, mem_type, place, ctx); > > + ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv); > > if (unlikely(ret != 0)) > > return ret; > > } while (1); > > @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, > > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > > while (!list_empty(&man->lru[i])) { > > spin_unlock(&glob->lru_lock); > > - ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx); > > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, > > + NULL); > > if (ret) > > return ret; > > spin_lock(&glob->lru_lock); > > @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) > > spin_lock(&glob->lru_lock); > > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > > list_for_each_entry(bo, &glob->swap_lru[i], swap) { > > - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) { > > + if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked, > > + NULL)) { > > ret = 0; > > break; > > } > > -- > > 2.17.1 > > > > _______________________________________________ > > dri-devel mailing list > > dri-devel@xxxxxxxxxxxxxxxxxxxxx > > https://lists.freedesktop.org/mailman/listinfo/dri-devel > > -- > Daniel Vetter > Software Engineer, Intel Corporation > http://blog.ffwll.ch -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx