Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Be consistent, and even when we know we had used a WC, flush the mapped > object after writing into it. The flush understands the mapping type and > will only clflush if !I915_MAP_WC, but will always insert a wmb [sfence] > so that we can be sure that all writes are visible. > > v2: Add the unconditional wmb so we are know that we always flush the > writes to memory/HW at that point. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 8 ++++++-- > drivers/gpu/drm/i915/gem/i915_gem_pages.c | 1 + > drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 2 ++ > drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 2 ++ > drivers/gpu/drm/i915/gt/selftest_ring_submission.c | 2 ++ > drivers/gpu/drm/i915/gt/selftest_rps.c | 2 ++ > drivers/gpu/drm/i915/selftests/i915_request.c | 9 +++++++-- > 7 files changed, 22 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > index 2fc7737ef5f4..f457d7130491 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > @@ -78,10 +78,12 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, > } while (rem); > > *cmd = MI_BATCH_BUFFER_END; > - intel_gt_chipset_flush(ce->vm->gt); > > + i915_gem_object_flush_map(pool->obj); > i915_gem_object_unpin_map(pool->obj); Ok, so there is explicit wmb now in flush_map even if bo is marketed as coherent. That wmb will then make the writes to flushed past that point... > > + intel_gt_chipset_flush(ce->vm->gt); and then the do the sfence for wcb. This pattern feels solid for me. If we would mark map for writes, we could do everything in unpin_map. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > + > batch = i915_vma_instance(pool->obj, ce->vm, NULL); > if (IS_ERR(batch)) { > err = PTR_ERR(batch); > @@ -289,10 +291,12 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, > } while (rem); > > *cmd = MI_BATCH_BUFFER_END; > - intel_gt_chipset_flush(ce->vm->gt); > > + i915_gem_object_flush_map(pool->obj); > i915_gem_object_unpin_map(pool->obj); > > + intel_gt_chipset_flush(ce->vm->gt); > + > batch = i915_vma_instance(pool->obj, ce->vm, NULL); > if (IS_ERR(batch)) { > err = PTR_ERR(batch); > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > index 5d855fcd5c0f..189efcd58942 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > @@ -391,6 +391,7 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, > GEM_BUG_ON(range_overflows_t(typeof(obj->base.size), > offset, size, obj->base.size)); > > + wmb(); /* let all previous writes be visible to HW */ > obj->mm.dirty = true; > > if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c > index 3f6079e1dfb6..87d7d8aa080f 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c > @@ -158,6 +158,8 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) > return PTR_ERR(map); > > map[offset / sizeof(*map)] = v; > + > + __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map)); > i915_gem_object_unpin_map(ctx->obj); > > return 0; > diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c > index 772d8cba7da9..226b5fa9b430 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c > +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c > @@ -83,6 +83,8 @@ igt_emit_store_dw(struct i915_vma *vma, > offset += PAGE_SIZE; > } > *cmd = MI_BATCH_BUFFER_END; > + > + i915_gem_object_flush_map(obj); > i915_gem_object_unpin_map(obj); > > intel_gt_chipset_flush(vma->vm->gt); > diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c > index 9995faadd7e8..3350e7c995bc 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c > +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c > @@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine) > *cs++ = STACK_MAGIC; > > *cs++ = MI_BATCH_BUFFER_END; > + > + i915_gem_object_flush_map(obj); > i915_gem_object_unpin_map(obj); > > vma->private = intel_context_create(engine); /* dummy residuals */ > diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c > index bfa1a15564f7..6275d69aa9cc 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_rps.c > +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c > @@ -727,6 +727,7 @@ int live_rps_frequency_cs(void *arg) > > err_vma: > *cancel = MI_BATCH_BUFFER_END; > + i915_gem_object_flush_map(vma->obj); > i915_gem_object_unpin_map(vma->obj); > i915_vma_unpin(vma); > i915_vma_put(vma); > @@ -868,6 +869,7 @@ int live_rps_frequency_srm(void *arg) > > err_vma: > *cancel = MI_BATCH_BUFFER_END; > + i915_gem_object_flush_map(vma->obj); > i915_gem_object_unpin_map(vma->obj); > i915_vma_unpin(vma); > i915_vma_put(vma); > diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c > index 15b1ca9f7a01..c191976e1d15 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_request.c > +++ b/drivers/gpu/drm/i915/selftests/i915_request.c > @@ -816,10 +816,12 @@ static int recursive_batch_resolve(struct i915_vma *batch) > return PTR_ERR(cmd); > > *cmd = MI_BATCH_BUFFER_END; > - intel_gt_chipset_flush(batch->vm->gt); > > + __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); > i915_gem_object_unpin_map(batch->obj); > > + intel_gt_chipset_flush(batch->vm->gt); > + > return 0; > } > > @@ -1060,9 +1062,12 @@ static int live_sequential_engines(void *arg) > I915_MAP_WC); > if (!IS_ERR(cmd)) { > *cmd = MI_BATCH_BUFFER_END; > - intel_gt_chipset_flush(engine->gt); > > + __i915_gem_object_flush_map(request[idx]->batch->obj, > + 0, sizeof(*cmd)); > i915_gem_object_unpin_map(request[idx]->batch->obj); > + > + intel_gt_chipset_flush(engine->gt); > } > > i915_vma_put(request[idx]->batch); > -- > 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx