I've foudn one more problem with this.
With lockdep enabled I get a warning because
ttm_eu_reserve_buffers()
has called ww_acquire_done() on the ticket (which
essentially means we
are done, no more locking with that ticket).
The simplest solution is probably to just remove the call to
ww_acquire_done() from ttm_eu_reserve_buffers().
Christian.
Am 07.05.19 um 13:45 schrieb Chunming Zhou:
> heavy gpu job could occupy memory long time, which lead
other user fail to get memory.
>
> basically pick up Christian idea:
>
> 1. Reserve the BO in DC using a ww_mutex ticket
(trivial).
> 2. If we then run into this EBUSY condition in TTM
check if the BO we need memory for (or rather the ww_mutex
of its reservation object) has a ticket assigned.
> 3. If we have a ticket we grab a reference to the first
BO on the LRU, drop the LRU lock and try to grab the
reservation lock with the ticket.
> 4. If getting the reservation lock with the ticket
succeeded we check if the BO is still the first one on the
LRU in question (the BO could have moved).
> 5. If the BO is still the first one on the LRU in
question we try to evict it as we would evict any other BO.
> 6. If any of the "If's" above fail we just back off and
return -EBUSY.
>
> v2: fix some minor check
> v3: address Christian v2 comments.
> v4: fix some missing
> v5: handle first_bo unlock and bo_get/put
> v6: abstract unified iterate function, and handle all
possible usecase not only pinned bo.
> v7: pass request bo->resv to ttm_bo_evict_first
>
> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> Signed-off-by: Chunming Zhou
<david1.zhou@xxxxxxx>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 111
+++++++++++++++++++++++++++++------
> 1 file changed, 94 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8502b3ed2d88..f5e6328e4a57 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -766,11 +766,13 @@
EXPORT_SYMBOL(ttm_bo_eviction_valuable);
> * b. Otherwise, trylock it.
> */
> static bool ttm_bo_evict_swapout_allowable(struct
ttm_buffer_object *bo,
> - struct ttm_operation_ctx *ctx,
bool *locked)
> + struct ttm_operation_ctx *ctx,
bool *locked, bool *busy)
> {
> bool ret = false;
>
> *locked = false;
> + if (busy)
> + *busy = false;
> if (bo->resv == ctx->resv) {
>
reservation_object_assert_held(bo->resv);
> if (ctx->flags &
TTM_OPT_FLAG_ALLOW_RES_EVICT
> @@ -779,35 +781,46 @@ static bool
ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> } else {
> *locked =
reservation_object_trylock(bo->resv);
> ret = *locked;
> + if (!ret && busy)
> + *busy = true;
> }
>
> return ret;
> }
>
> -static int ttm_mem_evict_first(struct ttm_bo_device
*bdev,
> - uint32_t mem_type,
> - const struct ttm_place
*place,
> - struct ttm_operation_ctx
*ctx)
> +static struct ttm_buffer_object*
> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> + struct ttm_mem_type_manager
*man,
> + const struct ttm_place *place,
> + struct ttm_operation_ctx *ctx,
> + struct ttm_buffer_object
**first_bo,
> + bool *locked)
> {
> - struct ttm_bo_global *glob = bdev->glob;
> - struct ttm_mem_type_manager *man =
&bdev->man[mem_type];
> struct ttm_buffer_object *bo = NULL;
> - bool locked = false;
> - unsigned i;
> - int ret;
> + int i;
>
> - spin_lock(&glob->lru_lock);
> + if (first_bo)
> + *first_bo = NULL;
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> list_for_each_entry(bo,
&man->lru[i], lru) {
> - if
(!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> + bool busy = false;
> +
> + if
(!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>
+
&busy)) {
> + if (first_bo &&
!(*first_bo) && busy) {
> + ttm_bo_get(bo);
> + *first_bo = bo;
> + }
> continue;
> + }
>
> if (place &&
!bdev->driver->eviction_valuable(bo,
>
place)) {
> - if (locked)
> + if (*locked)
>
reservation_object_unlock(bo->resv);
> continue;
> }
> +
> break;
> }
>
> @@ -818,9 +831,67 @@ static int
ttm_mem_evict_first(struct ttm_bo_device *bdev,
> bo = NULL;
> }
>
> + return bo;
> +}
> +
> +static int ttm_mem_evict_first(struct ttm_bo_device
*bdev,
> + uint32_t mem_type,
> + const struct ttm_place
*place,
> + struct ttm_operation_ctx
*ctx,
> + struct reservation_object
*request_resv)
> +{
> + struct ttm_bo_global *glob = bdev->glob;
> + struct ttm_mem_type_manager *man =
&bdev->man[mem_type];
> + struct ttm_buffer_object *bo = NULL, *first_bo =
NULL;
> + bool locked = false;
> + int ret;
> +
> + spin_lock(&glob->lru_lock);
> + bo = ttm_mem_find_evitable_bo(bdev, man, place,
ctx, &first_bo,
> + &locked);
> if (!bo) {
> + struct ttm_operation_ctx busy_ctx;
> +
> spin_unlock(&glob->lru_lock);
> - return -EBUSY;
> + /* check if other user occupy memory too
long time */
> + if (!first_bo || !request_resv ||
!request_resv->lock.ctx) {
> + if (first_bo)
> + ttm_bo_put(first_bo);
> + return -EBUSY;
> + }
> + if (first_bo->resv == request_resv) {
> + ttm_bo_put(first_bo);
> + return -EBUSY;
> + }
> + if (ctx->interruptible)
> + ret =
ww_mutex_lock_interruptible(&first_bo->resv->lock,
> +
request_resv->lock.ctx);
> + else
> + ret =
ww_mutex_lock(&first_bo->resv->lock,
request_resv->lock.ctx);
> + if (ret) {
> + ttm_bo_put(first_bo);
> + return ret;
> + }
> + spin_lock(&glob->lru_lock);
> + /* previous busy resv lock is held by
above, idle now,
> + * so let them evictable.
> + */
> + busy_ctx.interruptible =
ctx->interruptible;
> + busy_ctx.no_wait_gpu =
ctx->no_wait_gpu;
> + busy_ctx.resv =
first_bo->resv;
> + busy_ctx.flags =
TTM_OPT_FLAG_ALLOW_RES_EVICT;
> +
> + bo = ttm_mem_find_evitable_bo(bdev, man,
place, &busy_ctx, NULL,
> +
&locked);
> + if (bo && (bo->resv ==
first_bo->resv))
> + locked = true;
> + else if (bo)
> +
ww_mutex_unlock(&first_bo->resv->lock);
> + if (!bo) {
> +
spin_unlock(&glob->lru_lock);
> + ttm_bo_put(first_bo);
> + return -EBUSY;
> + }
> }
>
> kref_get(&bo->list_kref);
> @@ -829,11 +900,15 @@ static int
ttm_mem_evict_first(struct ttm_bo_device *bdev,
> ret = ttm_bo_cleanup_refs(bo,
ctx->interruptible,
>
ctx->no_wait_gpu, locked);
> kref_put(&bo->list_kref,
ttm_bo_release_list);
> + if (first_bo)
> + ttm_bo_put(first_bo);
> return ret;
> }
>
> ttm_bo_del_from_lru(bo);
> spin_unlock(&glob->lru_lock);
> + if (first_bo)
> + ttm_bo_put(first_bo);
>
> ret = ttm_bo_evict(bo, ctx);
> if (locked) {
> @@ -907,7 +982,7 @@ static int
ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
> return ret;
> if (mem->mm_node)
> break;
> - ret = ttm_mem_evict_first(bdev, mem_type,
place, ctx);
> + ret = ttm_mem_evict_first(bdev, mem_type,
place, ctx, bo->resv);
> if (unlikely(ret != 0))
> return ret;
> } while (1);
> @@ -1413,7 +1488,8 @@ static int
ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> while (!list_empty(&man->lru[i]))
{
>
spin_unlock(&glob->lru_lock);
> - ret = ttm_mem_evict_first(bdev,
mem_type, NULL, &ctx);
> + ret = ttm_mem_evict_first(bdev,
mem_type, NULL, &ctx,
> + NULL);
> if (ret)
> return ret;
>
spin_lock(&glob->lru_lock);
> @@ -1784,7 +1860,8 @@ int ttm_bo_swapout(struct
ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
> spin_lock(&glob->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> list_for_each_entry(bo,
&glob->swap_lru[i], swap) {
> - if
(ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> + if
(ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>
+
NULL)) {
> ret = 0;
> break;
> }