Quoting Matthew Auld (2019-08-09 23:26:18) > +struct i915_vma *intel_emit_vma_copy_blt(struct intel_engine_pool_node **p, > + struct intel_context *ce, > + struct i915_vma *src, > + struct i915_vma *dst) > +{ > + struct drm_i915_private *i915 = ce->vm->i915; > + const u32 block_size = S16_MAX * PAGE_SIZE; > + struct intel_engine_pool_node *pool; > + struct i915_vma *batch; > + u64 src_offset, dst_offset; > + u64 count; > + u64 rem; > + u32 size; > + u32 *cmd; > + int err; > + > + GEM_BUG_ON(src->size != dst->size); > + > + count = div_u64(dst->size, block_size); > + size = (1 + 11 * count) * sizeof(u32); > + size = round_up(size, PAGE_SIZE); > + pool = intel_engine_pool_get(&ce->engine->pool, size); > + if (IS_ERR(pool)) > + return ERR_CAST(pool); > + > + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); > + if (IS_ERR(cmd)) { > + err = PTR_ERR(cmd); > + goto out_put; > + } > + > + rem = src->size; > + src_offset = src->node.start; > + dst_offset = dst->node.start; > + > + do { > + u32 size = min_t(u64, rem, block_size); > + > + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); > + > + if (INTEL_GEN(i915) >= 9) { > + *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); > + *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; > + *cmd++ = 0; > + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; > + *cmd++ = lower_32_bits(dst_offset); > + *cmd++ = upper_32_bits(dst_offset); > + *cmd++ = 0; > + *cmd++ = PAGE_SIZE; > + *cmd++ = lower_32_bits(src_offset); > + *cmd++ = upper_32_bits(src_offset); > + } else if (INTEL_GEN(i915) >= 8) { > + *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); > + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; > + *cmd++ = 0; > + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; > + *cmd++ = lower_32_bits(dst_offset); > + *cmd++ = upper_32_bits(dst_offset); > + *cmd++ = 0; > + *cmd++ = PAGE_SIZE; > + *cmd++ = lower_32_bits(src_offset); > + *cmd++ = upper_32_bits(src_offset); > + } else { > + *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); > + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; > + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; > + *cmd++ = dst_offset; > + *cmd++ = PAGE_SIZE; > + *cmd++ = src_offset; > + } > + > + /* Allow ourselves to be preempted in between blocks. */ > + *cmd++ = MI_ARB_CHECK; > + > + src_offset += size; > + dst_offset += size; > + rem -= size; > + } while (rem); > + > + *cmd = MI_BATCH_BUFFER_END; > + intel_gt_chipset_flush(ce->vm->gt); > + > + i915_gem_object_unpin_map(pool->obj); > + > + batch = i915_vma_instance(pool->obj, ce->vm, NULL); > + if (IS_ERR(batch)) { > + err = PTR_ERR(batch); > + goto out_put; > + } > + > + err = i915_vma_pin(batch, 0, 0, PIN_USER); > + if (unlikely(err)) > + goto out_put; > + > + *p = pool; > + return batch; > + > +out_put: > + intel_engine_pool_put(pool); > + return ERR_PTR(err); > +} > + > +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, > + struct drm_i915_gem_object *dst, > + struct intel_context *ce) > +{ > + struct drm_gem_object *objs[] = { &src->base, &dst->base }; > + struct i915_address_space *vm = ce->vm; > + struct intel_engine_pool_node *pool; > + struct ww_acquire_ctx acquire; > + struct i915_vma *vma_src, *vma_dst; > + struct i915_vma *batch; > + struct i915_request *rq; > + int err; > + > + vma_src = i915_vma_instance(src, vm, NULL); > + if (IS_ERR(vma_src)) > + return PTR_ERR(vma_src); > + > + err = i915_vma_pin(vma_src, 0, 0, PIN_USER); > + if (unlikely(err)) > + return err; > + > + vma_dst = i915_vma_instance(dst, vm, NULL); > + if (IS_ERR(vma_dst)) > + goto out_unpin_src; > + > + err = i915_vma_pin(vma_dst, 0, 0, PIN_USER); > + if (unlikely(err)) > + goto out_unpin_src; > + > + intel_engine_pm_get(ce->engine); > + batch = intel_emit_vma_copy_blt(&pool, ce, vma_src, vma_dst); > + if (IS_ERR(batch)) { > + err = PTR_ERR(batch); > + goto out_unpin_dst; > + } > + > + rq = intel_context_create_request(ce); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_batch; > + } > + > + i915_vma_lock(batch); > + err = i915_vma_move_to_active(batch, rq, 0); > + i915_vma_unlock(batch); > + if (unlikely(err)) > + goto out_request; > + > + err = intel_engine_pool_mark_active(pool, rq); > + if (unlikely(err)) > + goto out_request; > + > + err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); > + if (unlikely(err)) > + goto out_request; > + > + if (src->cache_dirty & ~src->cache_coherent) > + i915_gem_clflush_object(src, 0); > + > + if (dst->cache_dirty & ~dst->cache_coherent) > + i915_gem_clflush_object(dst, 0); > + > + err = i915_request_await_object(rq, src, false); > + if (unlikely(err)) > + goto out_unlock; > + > + err = i915_vma_move_to_active(vma_src, rq, 0); > + if (unlikely(err)) > + goto out_unlock; > + > + err = i915_request_await_object(rq, dst, true); > + if (unlikely(err)) > + goto out_unlock; > + > + err = i915_vma_move_to_active(vma_dst, rq, EXEC_OBJECT_WRITE); > + if (unlikely(err)) > + goto out_unlock; Strictly, wait on all objects, then setup all signals. Avoids any nasty cycles in the dependency graphs. Such as if someone passed in src = dst. Time for another selftest ;) for (i = 0; i < ARRAY_SIZE(obj); i++) { clflush_object(obj[i]); await_object(rq, obj[i]); } for (i = 0; i < ARRAY_SIZE(obj); i++) move_to_active(obj[i]); > + > + if (ce->engine->emit_init_breadcrumb) { > + err = ce->engine->emit_init_breadcrumb(rq); > + if (unlikely(err)) > + goto out_unlock; > + } > + > + err = ce->engine->emit_bb_start(rq, > + batch->node.start, batch->node.size, > + 0); > +out_unlock: > + drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); > +out_request: > + if (unlikely(err)) > + i915_request_skip(rq, err); > + > + i915_request_add(rq); > +out_batch: > + i915_vma_unpin(batch); > + intel_engine_pool_put(pool); > +out_unpin_dst: > + i915_vma_unpin(vma_dst); > + intel_engine_pm_put(ce->engine); > +out_unpin_src: > + i915_vma_unpin(vma_src); > + return err; > +} _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel