Since the gt migration code was using only a single fence for dependencies, these were collected in a dma_fence_array. However, it turns out that it's illegal to use some dma_fences in a dma_fence_array, in particular other dma_fence_arrays and dma_fence_chains, and this causes trouble for us moving forward. Have the gt migration code instead take a const struct i915_deps for dependencies. This means we can skip the dma_fence_array creation and instead pass the struct i915_deps instead to circumvent the problem. v2: - Make the prev_deps() function static. (kernel test robot <lkp@xxxxxxxxx>) - Update the struct i915_deps kerneldoc. v4: - Rebase. Fixes: 5652df829b3c ("drm/i915/ttm: Update i915_gem_obj_copy_ttm() to be asynchronous") Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 129 +++++-------------- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h | 17 +++ drivers/gpu/drm/i915/gt/intel_migrate.c | 24 ++-- drivers/gpu/drm/i915/gt/intel_migrate.h | 9 +- drivers/gpu/drm/i915/i915_request.c | 22 ++++ drivers/gpu/drm/i915/i915_request.h | 2 + 6 files changed, 91 insertions(+), 112 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 8ad09fcf3698..62a6fd2d127d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -3,8 +3,6 @@ * Copyright © 2021 Intel Corporation */ -#include <linux/dma-fence-array.h> - #include <drm/ttm/ttm_bo_driver.h> #include "i915_drv.h" @@ -44,17 +42,12 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration, #endif /** - * DOC: Set of utilities to dynamically collect dependencies and - * eventually coalesce them into a single fence which is fed into - * the GT migration code, since it only accepts a single dependency - * fence. - * The single fence returned from these utilities, in the case of - * dependencies from multiple fence contexts, a struct dma_fence_array, - * since the i915 request code can break that up and await the individual - * fences. + * DOC: Set of utilities to dynamically collect dependencies into a + * structure which is fed into the GT migration code. * * Once we can do async unbinding, this is also needed to coalesce - * the migration fence with the unbind fences. + * the migration fence with the unbind fences if these are coalesced + * post-migration. * * While collecting the individual dependencies, we store the refcounted * struct dma_fence pointers in a realloc-managed pointer array, since @@ -65,32 +58,13 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration, * A struct i915_deps need to be initialized using i915_deps_init(). * If i915_deps_add_dependency() or i915_deps_add_resv() return an * error code they will internally call i915_deps_fini(), which frees - * all internal references and allocations. After a call to - * i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly - * be viewed as uninitialized. + * all internal references and allocations. * * We might want to break this out into a separate file as a utility. */ #define I915_DEPS_MIN_ALLOC_CHUNK 8U -/** - * struct i915_deps - Collect dependencies into a single dma-fence - * @single: Storage for pointer if the collection is a single fence. - * @fence: Allocated array of fence pointers if more than a single fence; - * otherwise points to the address of @single. - * @num_deps: Current number of dependency fences. - * @fences_size: Size of the @fences array in number of pointers. - * @gfp: Allocation mode. - */ -struct i915_deps { - struct dma_fence *single; - struct dma_fence **fences; - unsigned int num_deps; - unsigned int fences_size; - gfp_t gfp; -}; - static void i915_deps_reset_fences(struct i915_deps *deps) { if (deps->fences != &deps->single) @@ -163,7 +137,7 @@ static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence, return ret; } -static int i915_deps_sync(struct i915_deps *deps, +static int i915_deps_sync(const struct i915_deps *deps, const struct ttm_operation_ctx *ctx) { struct dma_fence **fences = deps->fences; @@ -183,7 +157,6 @@ static int i915_deps_sync(struct i915_deps *deps, break; } - i915_deps_fini(deps); return ret; } @@ -221,34 +194,6 @@ static int i915_deps_add_dependency(struct i915_deps *deps, return i915_deps_grow(deps, fence, ctx); } -static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps, - const struct ttm_operation_ctx *ctx) -{ - struct dma_fence_array *array; - - if (deps->num_deps == 0) - return NULL; - - if (deps->num_deps == 1) { - deps->num_deps = 0; - return deps->fences[0]; - } - - /* - * TODO: Alter the allocation mode here to not try too hard to - * make things async. - */ - array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0, - false); - if (!array) - return ERR_PTR(i915_deps_sync(deps, ctx)); - - deps->fences = NULL; - i915_deps_reset_fences(deps); - - return &array->base; -} - static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv, bool all, const bool no_excl, const struct ttm_operation_ctx *ctx) @@ -387,7 +332,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, struct sg_table *dst_st, - struct dma_fence *dep) + const struct i915_deps *deps) { struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), bdev); @@ -411,7 +356,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, return ERR_PTR(-EINVAL); intel_engine_pm_get(to_gt(i915)->migrate.context->engine); - ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, dep, + ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps, dst_st->sgl, dst_level, i915_ttm_gtt_binds_lmem(dst_mem), 0, &rq); @@ -425,7 +370,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); intel_engine_pm_get(to_gt(i915)->migrate.context->engine); ret = intel_context_migrate_copy(to_gt(i915)->migrate.context, - dep, src_rsgt->table.sgl, + deps, src_rsgt->table.sgl, src_level, i915_ttm_gtt_binds_lmem(bo->resource), dst_st->sgl, dst_level, @@ -610,10 +555,11 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, } static struct dma_fence * -__i915_ttm_move(struct ttm_buffer_object *bo, bool clear, +__i915_ttm_move(struct ttm_buffer_object *bo, + const struct ttm_operation_ctx *ctx, bool clear, struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, struct i915_refct_sgt *dst_rsgt, bool allow_accel, - struct dma_fence *move_dep) + const struct i915_deps *move_deps) { struct i915_ttm_memcpy_work *copy_work = NULL; struct i915_ttm_memcpy_arg _arg, *arg = &_arg; @@ -621,7 +567,7 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, if (allow_accel) { fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, - &dst_rsgt->table, move_dep); + &dst_rsgt->table, move_deps); /* * We only need to intercept the error when moving to lmem. @@ -655,8 +601,8 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, if (!IS_ERR(fence)) goto out; - } else if (move_dep) { - int err = dma_fence_wait(move_dep, true); + } else if (move_deps) { + int err = i915_deps_sync(move_deps, ctx); if (err) return ERR_PTR(err); @@ -680,29 +626,21 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, return fence; } -static struct dma_fence *prev_fence(struct ttm_buffer_object *bo, - struct ttm_operation_ctx *ctx) +static int +prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + struct i915_deps *deps) { - struct i915_deps deps; int ret; - /* - * Instead of trying hard with GFP_KERNEL to allocate memory, - * the dependency collection will just sync if it doesn't - * succeed. - */ - i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); - ret = i915_deps_add_dependency(&deps, bo->moving, ctx); + ret = i915_deps_add_dependency(deps, bo->moving, ctx); if (!ret) /* * TODO: Only await excl fence here, and shared fences before * signaling the migration fence. */ - ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx); - if (ret) - return ERR_PTR(ret); + ret = i915_deps_add_resv(deps, bo->base.resv, true, false, ctx); - return i915_deps_to_fence(&deps, ctx); + return ret; } /** @@ -756,16 +694,18 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { - struct dma_fence *dep = prev_fence(bo, ctx); + struct i915_deps deps; - if (IS_ERR(dep)) { + i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + ret = prev_deps(bo, ctx, &deps); + if (ret) { i915_refct_sgt_put(dst_rsgt); - return PTR_ERR(dep); + return ret; } - migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm, - dst_rsgt, true, dep); - dma_fence_put(dep); + migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm, + dst_rsgt, true, &deps); + i915_deps_fini(&deps); } /* We can possibly get an -ERESTARTSYS here */ @@ -826,7 +766,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, .interruptible = intr, }; struct i915_refct_sgt *dst_rsgt; - struct dma_fence *copy_fence, *dep_fence; + struct dma_fence *copy_fence; struct i915_deps deps; int ret, shared_err; @@ -847,15 +787,12 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, if (ret) return ret; - dep_fence = i915_deps_to_fence(&deps, &ctx); - if (IS_ERR(dep_fence)) - return PTR_ERR(dep_fence); - dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); - copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource, + copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource, dst_bo->ttm, dst_rsgt, allow_accel, - dep_fence); + &deps); + i915_deps_fini(&deps); i915_refct_sgt_put(dst_rsgt); if (IS_ERR_OR_NULL(copy_fence)) return PTR_ERR_OR_ZERO(copy_fence); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h index d2e7f149e05c..138b7647a558 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h @@ -18,6 +18,23 @@ struct ttm_tt; struct drm_i915_gem_object; struct i915_refct_sgt; +/** + * struct i915_deps - Collect dependencies into a single dma-fence + * @single: Storage for pointer if the collection is a single fence. + * @fences: Allocated array of fence pointers if more than a single fence; + * otherwise points to the address of @single. + * @num_deps: Current number of dependency fences. + * @fences_size: Size of the @fences array in number of pointers. + * @gfp: Allocation mode. + */ +struct i915_deps { + struct dma_fence *single; + struct dma_fence **fences; + unsigned int num_deps; + unsigned int fences_size; + gfp_t gfp; +}; + int i915_ttm_move_notify(struct ttm_buffer_object *bo); I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 19a01878fee3..18b44af56969 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -404,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size) int intel_context_migrate_copy(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -431,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_ce; } - if (await) { - err = i915_request_await_dma_fence(rq, await); + if (deps) { + err = i915_request_await_deps(rq, deps); if (err) goto out_rq; @@ -442,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_rq; } - await = NULL; + deps = NULL; } /* The PTE updates + copy must not be interrupted. */ @@ -525,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value) int intel_context_migrate_clear(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -550,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce, goto out_ce; } - if (await) { - err = i915_request_await_dma_fence(rq, await); + if (deps) { + err = i915_request_await_deps(rq, deps); if (err) goto out_rq; @@ -561,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce, goto out_rq; } - await = NULL; + deps = NULL; } /* The PTE updates + clear must not be interrupted. */ @@ -599,7 +599,7 @@ intel_context_migrate_clear(struct intel_context *ce, int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -624,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_copy(ce, await, + err = intel_context_migrate_copy(ce, deps, src, src_cache_level, src_is_lmem, dst, dst_cache_level, dst_is_lmem, out); @@ -638,7 +638,7 @@ int intel_migrate_copy(struct intel_migrate *m, int intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -661,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_clear(ce, await, sg, cache_level, + err = intel_context_migrate_clear(ce, deps, sg, cache_level, is_lmem, value, out); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h index 4e18e755a00b..ccc677ec4aa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.h +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -11,6 +11,7 @@ #include "intel_migrate_types.h" struct dma_fence; +struct i915_deps; struct i915_request; struct i915_gem_ww_ctx; struct intel_gt; @@ -23,7 +24,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m); int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -33,7 +34,7 @@ int intel_migrate_copy(struct intel_migrate *m, struct i915_request **out); int intel_context_migrate_copy(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *src, enum i915_cache_level src_cache_level, bool src_is_lmem, @@ -45,7 +46,7 @@ int intel_context_migrate_copy(struct intel_context *ce, int intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, @@ -53,7 +54,7 @@ intel_migrate_clear(struct intel_migrate *m, struct i915_request **out); int intel_context_migrate_clear(struct intel_context *ce, - struct dma_fence *await, + const struct i915_deps *deps, struct scatterlist *sg, enum i915_cache_level cache_level, bool is_lmem, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 40cf8ec0b9fc..7d804df27546 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,6 +32,7 @@ #include <linux/sched/mm.h> #include "gem/i915_gem_context.h" +#include "gem/i915_gem_ttm_move.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_engine.h" @@ -1543,6 +1544,27 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) return 0; } +/** + * i915_request_await_deps - set this request to (async) wait upon a struct + * i915_deps dma_fence collection + * @rq: request we are wishing to use + * @deps: The struct i915_deps containing the dependencies. + * + * Returns 0 if successful, negative error code on error. + */ +int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps) +{ + int i, err; + + for (i = 0; i < deps->num_deps; ++i) { + err = i915_request_await_dma_fence(rq, deps->fences[i]); + if (err) + return err; + } + + return 0; +} + /** * i915_request_await_object - set this request to (async) wait upon a bo * @to: request we are wishing to use diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ce7714210697..170ee78c2858 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -47,6 +47,7 @@ struct drm_file; struct drm_i915_gem_object; struct drm_printer; +struct i915_deps; struct i915_request; #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) @@ -411,6 +412,7 @@ int i915_request_await_object(struct i915_request *to, bool write); int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); +int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps); int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence); -- 2.31.1