On Wed, 30 Jun 2021 at 16:27, Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> wrote: > > On Wed, 2021-06-30 at 15:19 +0100, Matthew Auld wrote: > > On Thu, 24 Jun 2021 at 20:31, Thomas Hellström > > <thomas.hellstrom@xxxxxxxxxxxxxxx> wrote: > > > > > > In order to make the code a bit more readable and to facilitate > > > async memcpy moves, reorganize the move code a little. Determine > > > at an early stage whether to copy or to clear. > > > > > > Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > > > --- > > > drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 70 ++++++++++++++------- > > > ---- > > > 1 file changed, 40 insertions(+), 30 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > > > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > > > index c39d982c4fa6..4e529adcdfc7 100644 > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > > > @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct > > > drm_i915_gem_object *obj, > > > } > > > > > > static int i915_ttm_accel_move(struct ttm_buffer_object *bo, > > > + bool clear, > > > struct ttm_resource *dst_mem, > > > struct sg_table *dst_st) > > > { > > > @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct > > > ttm_buffer_object *bo, > > > return -EINVAL; > > > > > > dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); > > > - if (!ttm || !ttm_tt_is_populated(ttm)) { > > > + if (clear) { > > > if (bo->type == ttm_bo_type_kernel) > > > return -EINVAL; > > > > Was that meant to be: > > return 0: > > > > ? > > > > Also does that mean we are incorrectly falling back to memset, for > > non-userspace objects, instead of making it a noop? > > No, we're deliberately falling back to memset for non-userspace > objects, but the logic only memsets in the BO_ALLOC_CPU_CLEAR case if > everything is implemented correctly. > > > > > > > > > - if (ttm && !(ttm->page_flags & > > > TTM_PAGE_FLAG_ZERO_ALLOC)) > > > - return 0; > > > - > > > intel_engine_pm_get(i915->gt.migrate.context- > > > >engine); > > > ret = intel_context_migrate_clear(i915- > > > >gt.migrate.context, NULL, > > > dst_st->sgl, > > > dst_level, > > > @@ -489,27 +487,53 @@ static int i915_ttm_accel_move(struct > > > ttm_buffer_object *bo, > > > return ret; > > > } > > > > > > -static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, > > > - struct ttm_operation_ctx *ctx, > > > - struct ttm_resource *dst_mem, > > > - struct ttm_place *hop) > > > +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool > > > clear, > > > + struct ttm_resource *dst_mem, > > > + struct sg_table *dst_st) > > > { > > > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); > > > - struct ttm_resource_manager *dst_man = > > > - ttm_manager_type(bo->bdev, dst_mem->mem_type); > > > struct intel_memory_region *dst_reg, *src_reg; > > > union { > > > struct ttm_kmap_iter_tt tt; > > > struct ttm_kmap_iter_iomap io; > > > } _dst_iter, _src_iter; > > > struct ttm_kmap_iter *dst_iter, *src_iter; > > > - struct sg_table *dst_st; > > > int ret; > > > > > > dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); > > > src_reg = i915_ttm_region(bo->bdev, bo->resource- > > > >mem_type); > > > GEM_BUG_ON(!dst_reg || !src_reg); > > > > > > + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); > > > + if (ret) { > > > > One future consideration is flat CCS where I don't think we can > > easily > > fall back to memcpy for userspace objects. Maybe we can make this > > fallback conditional on DG1 or !ALLOC_USER for now, or just add a > > TODO? > > Ugh. Is that true for both clearing and copying, or is it only copying? With clearing I think we are required to nuke the aux CCS state using some special blitter command. For copying/moving I think it's a similar story, where special care might be needed for the aux state, which likely requires the blitter. Although tbh I don't really remember all the details. > > Problem is if we hit an engine reset and fence error during initial > clearing / swapin, the plan moving forward is to intercept that and > resort to cpu clearing / copying for security reasons. In the worst > case we at least need to be able to clear. > > /Thomas > > > > > > > + dst_iter = !cpu_maps_iomem(dst_mem) ? > > > + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo- > > > >ttm) : > > > + ttm_kmap_iter_iomap_init(&_dst_iter.io, > > > &dst_reg->iomap, > > > + dst_st, dst_reg- > > > >region.start); > > > + > > > + src_iter = !cpu_maps_iomem(bo->resource) ? > > > + ttm_kmap_iter_tt_init(&_src_iter.tt, bo- > > > >ttm) : > > > + ttm_kmap_iter_iomap_init(&_src_iter.io, > > > &src_reg->iomap, > > > + obj- > > > >ttm.cached_io_st, > > > + src_reg- > > > >region.start); > > > + > > > + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, > > > src_iter); > > > + } > > > +} > > > + > > > +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, > > > + struct ttm_operation_ctx *ctx, > > > + struct ttm_resource *dst_mem, > > > + struct ttm_place *hop) > > > +{ > > > + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); > > > + struct ttm_resource_manager *dst_man = > > > + ttm_manager_type(bo->bdev, dst_mem->mem_type); > > > + struct ttm_tt *ttm = bo->ttm; > > > + struct sg_table *dst_st; > > > + bool clear; > > > + int ret; > > > + > > > /* Sync for now. We could do the actual copy async. */ > > > ret = ttm_bo_wait_ctx(bo, ctx); > > > if (ret) > > > @@ -526,9 +550,8 @@ static int i915_ttm_move(struct > > > ttm_buffer_object *bo, bool evict, > > > } > > > > > > /* Populate ttm with pages if needed. Typically system > > > memory. */ > > > - if (bo->ttm && (dst_man->use_tt || > > > - (bo->ttm->page_flags & > > > TTM_PAGE_FLAG_SWAPPED))) { > > > - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); > > > + if (ttm && (dst_man->use_tt || (ttm->page_flags & > > > TTM_PAGE_FLAG_SWAPPED))) { > > > + ret = ttm_tt_populate(bo->bdev, ttm, ctx); > > > if (ret) > > > return ret; > > > } > > > @@ -537,23 +560,10 @@ static int i915_ttm_move(struct > > > ttm_buffer_object *bo, bool evict, > > > if (IS_ERR(dst_st)) > > > return PTR_ERR(dst_st); > > > > > > - ret = i915_ttm_accel_move(bo, dst_mem, dst_st); > > > - if (ret) { > > > - /* If we start mapping GGTT, we can no longer use > > > man::use_tt here. */ > > > - dst_iter = !cpu_maps_iomem(dst_mem) ? > > > - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo- > > > >ttm) : > > > - ttm_kmap_iter_iomap_init(&_dst_iter.io, > > > &dst_reg->iomap, > > > - dst_st, dst_reg- > > > >region.start); > > > - > > > - src_iter = !cpu_maps_iomem(bo->resource) ? > > > - ttm_kmap_iter_tt_init(&_src_iter.tt, bo- > > > >ttm) : > > > - ttm_kmap_iter_iomap_init(&_src_iter.io, > > > &src_reg->iomap, > > > - obj- > > > >ttm.cached_io_st, > > > - src_reg- > > > >region.start); > > > + clear = !cpu_maps_iomem(bo->resource) && (!ttm || > > > !ttm_tt_is_populated(ttm)); > > > + if (!(clear && ttm && !(ttm->page_flags & > > > TTM_PAGE_FLAG_ZERO_ALLOC))) > > > > Seems quite hard to read? > > > > Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> > > > > > > > + __i915_ttm_move(bo, clear, dst_mem, dst_st); > > > > > > - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, > > > src_iter); > > > - } > > > - /* Below dst_mem becomes bo->resource. */ > > > ttm_bo_move_sync_cleanup(bo, dst_mem); > > > i915_ttm_adjust_domains_after_move(obj); > > > i915_ttm_free_cached_io_st(obj); > > > -- > > > 2.31.1 > > > > >