On eviction, TTM requires that migration fences from the same region are ordered using dma_fence_is_later(). For request-based fences we therefore need to use the same context for the migration, but now that we use a dma_fence_work for error recovery, and, in addition, might need to coalesce the migration fence with async unbind fences, Create a coalesce fence for this. Chain the coalesce fence on the migration fence and attach it to a region timeline. Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 84 ++++++++++++++++++---- drivers/gpu/drm/i915/intel_memory_region.c | 43 +++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 7 ++ 3 files changed, 119 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 79d4d50aa4e5..625ce52e8662 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -672,9 +672,10 @@ static void __i915_ttm_move_fallback(struct ttm_buffer_object *bo, bool clear, } } -static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, - struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, - struct i915_refct_sgt *dst_rsgt, bool allow_accel) +static struct dma_fence * +__i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, + struct i915_refct_sgt *dst_rsgt, bool allow_accel) { struct i915_ttm_memcpy_work *copy_work; struct dma_fence *fence; @@ -689,7 +690,7 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, /* Don't fail with -ENOMEM. Move sync instead. */ __i915_ttm_move_fallback(bo, clear, dst_mem, dst_ttm, dst_rsgt, allow_accel); - return 0; + return NULL; } dma_fence_work_init(©_work->base, &i915_ttm_memcpy_ops); @@ -714,14 +715,45 @@ static int __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, fence = dma_fence_get(©_work->base.dma); dma_fence_work_commit_imm(©_work->base); - /* - * We're synchronizing here for now. For async moves, return the - * fence. - */ - dma_fence_wait(fence, false); - dma_fence_put(fence); + return fence; +} - return ret; +/** + * struct i915_coalesce_fence - A dma-fence used to coalesce multiple fences + * similar to struct dm_fence_array, and at the same time being timeline- + * attached. + * @base: struct dma_fence_work base. + * @cb: Callback for timeline attachment. + */ +struct i915_coalesce_fence { + struct dma_fence_work base; + struct i915_sw_dma_fence_cb cb; +}; + +/* No .work or .release callback. Just coalescing. */ +static const struct dma_fence_work_ops i915_coalesce_fence_ops = { + .name = "Coalesce fence", +}; + +static struct dma_fence * +i915_ttm_coalesce_fence(struct dma_fence *fence, struct intel_memory_region *mr) +{ + struct i915_coalesce_fence *coalesce = + kmalloc(sizeof(*coalesce), GFP_KERNEL); + + if (!coalesce) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + return NULL; + } + + dma_fence_work_init(&coalesce->base, &i915_coalesce_fence_ops); + dma_fence_work_chain(&coalesce->base, fence); + dma_fence_work_timeline_attach(&mr->tl, &coalesce->base, &coalesce->cb); + dma_fence_get(&coalesce->base.dma); + dma_fence_work_commit_imm(&coalesce->base); + dma_fence_put(fence); + return &coalesce->base.dma; } static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, @@ -734,6 +766,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, ttm_manager_type(bo->bdev, dst_mem->mem_type); struct ttm_tt *ttm = bo->ttm; struct i915_refct_sgt *dst_rsgt; + struct dma_fence *fence = NULL; bool clear; int ret; @@ -765,7 +798,23 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true); + fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true); + if (fence && evict) { + struct intel_memory_region *mr = + i915_ttm_region(bo->bdev, bo->resource->mem_type); + + /* + * Attach to the region timeline and for future async unbind, + * which requires a timeline. Also future async unbind fences + * can be attached here. + */ + fence = i915_ttm_coalesce_fence(fence, mr); + } + + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); @@ -1223,6 +1272,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, .interruptible = intr, }; struct i915_refct_sgt *dst_rsgt; + struct dma_fence *fence; int ret; assert_object_held(dst); @@ -1238,10 +1288,14 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, return ret; dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); - __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, - dst_rsgt, allow_accel); - + fence = __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, + dst_rsgt, allow_accel); i915_refct_sgt_put(dst_rsgt); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e7f7e6627750..aa1733e840f7 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -7,6 +7,9 @@ #include "i915_drv.h" #include "i915_ttm_buddy_manager.h" +static const struct dma_fence_work_timeline_ops tl_ops; +static void intel_region_timeline_release_work(struct work_struct *work); + static const struct { u16 class; u16 instance; @@ -127,6 +130,10 @@ intel_memory_region_create(struct drm_i915_private *i915, } kref_init(&mem->kref); + + INIT_WORK(&mem->tl_put_work, intel_region_timeline_release_work); + dma_fence_work_timeline_init(&mem->tl, NULL, &tl_ops); + return mem; err_free: @@ -238,6 +245,42 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915) } } +static void intel_region_timeline_get(struct dma_fence_work_timeline *tl) +{ + struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl); + + intel_memory_region_get(mr); +} + +static void intel_region_timeline_release_work(struct work_struct *work) +{ + struct intel_memory_region *mr = + container_of(work, typeof(*mr), tl_put_work); + + __intel_memory_region_destroy(&mr->kref); +} + +static void intel_region_timeline_release(struct kref *ref) +{ + struct intel_memory_region *mr = container_of(ref, typeof(*mr), kref); + + /* May be called from hardirq context, so queue the final release. */ + queue_work(system_unbound_wq, &mr->tl_put_work); +} + +static void intel_region_timeline_put(struct dma_fence_work_timeline *tl) +{ + struct intel_memory_region *mr = container_of(tl, typeof(*mr), tl); + + kref_put(&mr->kref, intel_region_timeline_release); +} + +static const struct dma_fence_work_timeline_ops tl_ops = { + .name = "Region timeline", + .get = intel_region_timeline_get, + .put = intel_region_timeline_put, +}; + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_memory_region.c" #include "selftests/mock_region.c" diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3feae3353d33..928819e2edba 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -13,6 +13,8 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "i915_sw_fence_work.h" + struct drm_i915_private; struct drm_i915_gem_object; struct drm_printer; @@ -94,6 +96,11 @@ struct intel_memory_region { bool is_range_manager; void *region_private; + + /** Timeline for TTM eviction fences */ + struct dma_fence_work_timeline tl; + /** Work struct for _region_put() from atomic / irq context */ + struct work_struct tl_put_work; }; struct intel_memory_region * -- 2.31.1