Allow specifying a drm_exec object in TTMs operation context which is used to lock objects during eviction. This allows to handle deadlocks much more gracefully and with that avoid returning -ENOMEM on heavily contended domains. v2: rebased on top of Thomas work TODO: This still doesn't handle BOs which are about to be torn down correctly. Signed-off-by: Christian König <christian.koenig@xxxxxxx> --- drivers/gpu/drm/ttm/ttm_bo_util.c | 45 +++++++++++++++++++++++++------ drivers/gpu/drm/ttm/ttm_bo_util.h | 2 ++ include/drm/ttm/ttm_bo.h | 3 +++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 7a4bc7e9950b..850e329ab5a5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -36,6 +36,7 @@ #include <drm/ttm/ttm_tt.h> #include <drm/drm_cache.h> +#include <drm/drm_exec.h> #include "ttm_bo_util.h" @@ -776,15 +777,22 @@ static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk, { struct ttm_operation_ctx *ctx = walk->ctx; + walk->needs_drop = false; walk->needs_unlock = false; - if (dma_resv_trylock(bo->base.resv)) { - walk->needs_unlock = true; + if (bo->base.resv == ctx->resv && ctx->allow_res_evict) { + dma_resv_assert_held(bo->base.resv); return true; } - if (bo->base.resv == ctx->resv && ctx->allow_res_evict) { - dma_resv_assert_held(bo->base.resv); + if (walk->ctx->exec) { + if (drm_exec_trylock_obj(walk->ctx->exec, &bo->base)) { + walk->needs_drop = true; + return true; + } + + } else if (dma_resv_trylock(bo->base.resv)) { + walk->needs_unlock = true; return true; } @@ -797,7 +805,9 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk, struct dma_resv *resv = bo->base.resv; int ret; - if (walk->ctx->interruptible) + if (walk->ctx->exec) + ret = drm_exec_lock_obj(walk->ctx->exec, &bo->base); + else if (walk->ctx->interruptible) ret = dma_resv_lock_interruptible(resv, walk->ticket); else ret = dma_resv_lock(resv, walk->ticket); @@ -811,7 +821,8 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk, * trylocking for this walk. */ walk->ticket = NULL; - } else if (ret == -EDEADLK) { + + } else if (!walk->ctx->exec && ret == -EDEADLK) { /* Caller needs to exit the ww transaction. */ ret = -ENOSPC; } @@ -822,7 +833,15 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk, static void ttm_lru_walk_unlock(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) { - if (walk->needs_unlock) + if (walk->needs_drop) + drm_exec_drop_trylocked_obj(walk->ctx->exec, &bo->base); + + if (!walk->needs_unlock) + return; + + if (walk->ctx->exec) + drm_exec_unlock_obj(walk->ctx->exec, &bo->base); + else dma_resv_unlock(bo->base.resv); } @@ -891,8 +910,18 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, spin_unlock(&bdev->lru_lock); lret = 0; - if (!bo_locked) + if (!bo_locked) { lret = ttm_lru_walk_ticketlock(walk, bo); + } else if (walk->ctx->exec && !bo->deleted) { + lret = drm_exec_keep_trylocked_obj(walk->ctx->exec, + &bo->base); + if (!lret) { + walk->needs_drop = false; + walk->needs_unlock = true; + } + } else { + lret = 0; + } /* * Note that in between the release of the lru lock and the diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.h b/drivers/gpu/drm/ttm/ttm_bo_util.h index c653e16ccb76..5e1bb156837f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.h +++ b/drivers/gpu/drm/ttm/ttm_bo_util.h @@ -59,6 +59,8 @@ struct ttm_lru_walk { struct ww_acquire_ctx *ticket; /** @tryock_only: Only use trylock for locking. */ bool trylock_only; + /** @needs_drop: If the current BO needs a drm_exec trylock drop */ + bool needs_drop; /** @needs_unlock: If the current BO needs unlocking */ bool needs_unlock; }; diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 5f7c967222a2..5bee917e01e2 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -180,6 +180,8 @@ struct ttm_bo_kmap_obj { * faults. Should only be used by TTM internally. * @resv: Reservation object to allow reserved evictions with. * @bytes_moved: Statistics on how many bytes have been moved. + * @exec: optional drm_exec object to use for locking BOs and tracking which are + * locked. * * Context for TTM operations like changing buffer placement or general memory * allocation. @@ -192,6 +194,7 @@ struct ttm_operation_ctx { bool force_alloc; struct dma_resv *resv; uint64_t bytes_moved; + struct drm_exec *exec; }; /** -- 2.34.1