Am 07.05.19 um 13:08 schrieb zhoucm1: > > > On 2019年05月07日 18:53, Koenig, Christian wrote: >> Am 07.05.19 um 11:36 schrieb Chunming Zhou: >>> heavy gpu job could occupy memory long time, which lead other user >>> fail to get memory. >>> >>> basically pick up Christian idea: >>> >>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial). >>> 2. If we then run into this EBUSY condition in TTM check if the BO >>> we need memory for (or rather the ww_mutex of its reservation >>> object) has a ticket assigned. >>> 3. If we have a ticket we grab a reference to the first BO on the >>> LRU, drop the LRU lock and try to grab the reservation lock with the >>> ticket. >>> 4. If getting the reservation lock with the ticket succeeded we >>> check if the BO is still the first one on the LRU in question (the >>> BO could have moved). >>> 5. If the BO is still the first one on the LRU in question we try to >>> evict it as we would evict any other BO. >>> 6. If any of the "If's" above fail we just back off and return -EBUSY. >>> >>> v2: fix some minor check >>> v3: address Christian v2 comments. >>> v4: fix some missing >>> v5: handle first_bo unlock and bo_get/put >>> v6: abstract unified iterate function, and handle all possible >>> usecase not only pinned bo. >>> >>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7 >>> Signed-off-by: Chunming Zhou <david1.zhou@xxxxxxx> >>> --- >>> drivers/gpu/drm/ttm/ttm_bo.c | 113 >>> ++++++++++++++++++++++++++++++----- >>> 1 file changed, 97 insertions(+), 16 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c >>> b/drivers/gpu/drm/ttm/ttm_bo.c >>> index 8502b3ed2d88..bbf1d14d00a7 100644 >>> --- a/drivers/gpu/drm/ttm/ttm_bo.c >>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable); >>> * b. Otherwise, trylock it. >>> */ >>> static bool ttm_bo_evict_swapout_allowable(struct >>> ttm_buffer_object *bo, >>> - struct ttm_operation_ctx *ctx, bool *locked) >>> + struct ttm_operation_ctx *ctx, bool *locked, bool *busy) >>> { >>> bool ret = false; >>> *locked = false; >>> + if (busy) >>> + *busy = false; >>> if (bo->resv == ctx->resv) { >>> reservation_object_assert_held(bo->resv); >>> if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT >>> @@ -779,35 +781,45 @@ static bool >>> ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, >>> } else { >>> *locked = reservation_object_trylock(bo->resv); >>> ret = *locked; >>> + if (!ret && busy) >>> + *busy = true; >>> } >>> return ret; >>> } >>> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev, >>> - uint32_t mem_type, >>> - const struct ttm_place *place, >>> - struct ttm_operation_ctx *ctx) >>> +static struct ttm_buffer_object* >>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev, >>> + struct ttm_mem_type_manager *man, >>> + const struct ttm_place *place, >>> + struct ttm_operation_ctx *ctx, >>> + struct ttm_buffer_object **first_bo, >>> + bool *locked) >>> { >>> - struct ttm_bo_global *glob = bdev->glob; >>> - struct ttm_mem_type_manager *man = &bdev->man[mem_type]; >>> struct ttm_buffer_object *bo = NULL; >>> - bool locked = false; >>> - unsigned i; >>> - int ret; >>> + int i; >>> - spin_lock(&glob->lru_lock); >>> + if (first_bo) >>> + *first_bo = NULL; >>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >>> list_for_each_entry(bo, &man->lru[i], lru) { >>> - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) >>> + bool busy = false; >>> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked, >>> + &busy)) { >> A newline between declaration and code please. >> >>> + if (first_bo && !(*first_bo) && busy) { >>> + ttm_bo_get(bo); >>> + *first_bo = bo; >>> + } >>> continue; >>> + } >>> if (place && !bdev->driver->eviction_valuable(bo, >>> place)) { >>> - if (locked) >>> + if (*locked) >>> reservation_object_unlock(bo->resv); >>> continue; >>> } >>> + >>> break; >>> } >>> @@ -818,9 +830,66 @@ static int ttm_mem_evict_first(struct >>> ttm_bo_device *bdev, >>> bo = NULL; >>> } >>> + return bo; >>> +} >>> + >>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev, >>> + uint32_t mem_type, >>> + const struct ttm_place *place, >>> + struct ttm_operation_ctx *ctx) >>> +{ >>> + struct ttm_bo_global *glob = bdev->glob; >>> + struct ttm_mem_type_manager *man = &bdev->man[mem_type]; >>> + struct ttm_buffer_object *bo = NULL, *first_bo = NULL; >>> + bool locked = false; >>> + int ret; >>> + >>> + spin_lock(&glob->lru_lock); >>> + bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo, >>> + &locked); >>> if (!bo) { >>> + struct ttm_operation_ctx busy_ctx; >>> + >>> spin_unlock(&glob->lru_lock); >>> - return -EBUSY; >>> + /* check if other user occupy memory too long time */ >>> + if (!first_bo || !ctx || !ctx->resv || !ctx->resv->lock.ctx) { >>> + if (first_bo) >>> + ttm_bo_put(first_bo); >>> + return -EBUSY; >>> + } >>> + if (first_bo->resv == ctx->resv) { >>> + ttm_bo_put(first_bo); >>> + return -EBUSY; >>> + } >>> + if (ctx->interruptible) >>> + ret = ww_mutex_lock_interruptible(&first_bo->resv->lock, >>> + ctx->resv->lock.ctx); >>> + else >>> + ret = ww_mutex_lock(&first_bo->resv->lock, >>> ctx->resv->lock.ctx); >>> + if (ret) { >>> + ttm_bo_put(first_bo); >>> + return ret; >>> + } >>> + spin_lock(&glob->lru_lock); >>> + /* previous busy resv lock is held by above, idle now, >>> + * so let them evictable. >>> + */ >>> + busy_ctx.interruptible = ctx->interruptible; >>> + busy_ctx.no_wait_gpu = ctx->no_wait_gpu; >>> + busy_ctx.resv = first_bo->resv; >>> + busy_ctx.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT; >>> + >>> + bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, >>> NULL, >>> + &locked); >>> + if (bo && (bo->resv == first_bo->resv)) >>> + locked = true; >>> + else if (bo) >>> + ww_mutex_unlock(&first_bo->resv->lock); >>> + if (!bo) { >>> + spin_unlock(&glob->lru_lock); >>> + ttm_bo_put(first_bo); >>> + return -EBUSY; >>> + } >>> } >>> kref_get(&bo->list_kref); >>> @@ -829,11 +898,15 @@ static int ttm_mem_evict_first(struct >>> ttm_bo_device *bdev, >>> ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, >>> ctx->no_wait_gpu, locked); >>> kref_put(&bo->list_kref, ttm_bo_release_list); >>> + if (first_bo) >>> + ttm_bo_put(first_bo); >>> return ret; >>> } >>> ttm_bo_del_from_lru(bo); >>> spin_unlock(&glob->lru_lock); >>> + if (first_bo) >>> + ttm_bo_put(first_bo); >>> ret = ttm_bo_evict(bo, ctx); >>> if (locked) { >>> @@ -899,6 +972,13 @@ static int ttm_bo_mem_force_space(struct >>> ttm_buffer_object *bo, >>> { >>> struct ttm_bo_device *bdev = bo->bdev; >>> struct ttm_mem_type_manager *man = &bdev->man[mem_type]; >>> + struct ttm_operation_ctx native_ctx = { >>> + .interruptible = false, >>> + .no_wait_gpu = false, >>> + .resv = bo->resv, >>> + .flags = 0 >>> + }; >>> + struct ttm_operation_ctx *evict_ctx = ctx ? ctx : &native_ctx; >> I thought we made the ctx parameter mandatory, didn't we? Could be that >> I remember that incorrectly. > Prike said he see ctx->resv is null, in that case, code doesn't run > into busy path. > Oh, as you mentioned here, we need add .resv=bo->resv for every > ttm_operation_ctx. That's a huge change which will cross all vendor > drivers. > > Can we just force to evaluate evict_ctx->resv = bo->resv? That means > we just add one extra line: evict_ctx->resv = bo->resv. How about that? Well only if ctx->resv is NULL, otherwise we would overwrite some reservation context given by the driver. Probably better to give the acquir_ctx as separate parameter to ttm_mem_evict_first(). Christian. > > -David >> >> Christian. >> >>> int ret; >>> do { >>> @@ -907,7 +987,7 @@ static int ttm_bo_mem_force_space(struct >>> ttm_buffer_object *bo, >>> return ret; >>> if (mem->mm_node) >>> break; >>> - ret = ttm_mem_evict_first(bdev, mem_type, place, ctx); >>> + ret = ttm_mem_evict_first(bdev, mem_type, place, evict_ctx); >>> if (unlikely(ret != 0)) >>> return ret; >>> } while (1); >>> @@ -1784,7 +1864,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, >>> struct ttm_operation_ctx *ctx) >>> spin_lock(&glob->lru_lock); >>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >>> list_for_each_entry(bo, &glob->swap_lru[i], swap) { >>> - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) { >>> + if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked, >>> + NULL)) { >>> ret = 0; >>> break; >>> } > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel