Quoting Chris Wilson (2018-06-12 13:51:35) > For symmetry, simplicity and ensuring the request is always truly idle > upon its completion, always emit the closing flush prior to emitting the > request breadcrumb. Previously, we would only emit the flush if we had > started a user batch, but this just leaves all the other paths open to > speculation (do they affect the GPU caches or not?) With mm switching, a > key requirement is that the GPU is flushed and invalidated before hand, > so for absolute safety, we want that closing flush be mandatory. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> No word about perf impact? The patch description matches what it does. Assuming we're not murdering any testcases; Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Regards, Joonas > --- > drivers/gpu/drm/i915/i915_gem.c | 4 ++-- > drivers/gpu/drm/i915/i915_gem_context.c | 9 +-------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- > drivers/gpu/drm/i915/i915_request.c | 18 ++---------------- > drivers/gpu/drm/i915/i915_request.h | 4 +--- > drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- > .../drm/i915/selftests/i915_gem_coherency.c | 4 ++-- > .../gpu/drm/i915/selftests/i915_gem_context.c | 4 ++-- > drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- > .../gpu/drm/i915/selftests/intel_hangcheck.c | 16 ++++++++-------- > drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- > .../gpu/drm/i915/selftests/intel_workarounds.c | 2 +- > 12 files changed, 24 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 93efd92362db..8dd4d35655af 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3213,7 +3213,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, > rq = i915_request_alloc(engine, > dev_priv->kernel_context); > if (!IS_ERR(rq)) > - __i915_request_add(rq, false); > + i915_request_add(rq); > } > } > > @@ -5332,7 +5332,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) > if (engine->init_context) > err = engine->init_context(rq); > > - __i915_request_add(rq, true); > + i915_request_add(rq); > if (err) > goto err_active; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index b2c7ac1b074d..ef6ea4bcd773 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -700,14 +700,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) > i915_timeline_sync_set(rq->timeline, &prev->fence); > } > > - /* > - * Force a flush after the switch to ensure that all rendering > - * and operations prior to switching to the kernel context hits > - * memory. This should be guaranteed by the previous request, > - * but an extra layer of paranoia before we declare the system > - * idle (on suspend etc) is advisable! > - */ > - __i915_request_add(rq, true); > + i915_request_add(rq); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 2d2eb3075960..60dc2a865f5f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -921,7 +921,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) > i915_gem_object_unpin_map(cache->rq->batch->obj); > i915_gem_chipset_flush(cache->rq->i915); > > - __i915_request_add(cache->rq, true); > + i915_request_add(cache->rq); > cache->rq = NULL; > } > > @@ -2438,7 +2438,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > trace_i915_request_queue(eb.request, eb.batch_flags); > err = eb_submit(&eb); > err_request: > - __i915_request_add(eb.request, err == 0); > + i915_request_add(eb.request); > add_to_client(eb.request, file); > > if (fences) > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 9092f5464c24..e1dbb544046f 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1018,14 +1018,13 @@ i915_request_await_object(struct i915_request *to, > * request is not being tracked for completion but the work itself is > * going to happen on the hardware. This would be a Bad Thing(tm). > */ > -void __i915_request_add(struct i915_request *request, bool flush_caches) > +void i915_request_add(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > struct i915_timeline *timeline = request->timeline; > struct intel_ring *ring = request->ring; > struct i915_request *prev; > u32 *cs; > - int err; > > GEM_TRACE("%s fence %llx:%d\n", > engine->name, request->fence.context, request->fence.seqno); > @@ -1046,20 +1045,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) > * know that it is time to use that space up. > */ > request->reserved_space = 0; > - > - /* > - * Emit any outstanding flushes - execbuf can fail to emit the flush > - * after having emitted the batchbuffer command. Hence we need to fix > - * things up similar to emitting the lazy request. The difference here > - * is that the flush _must_ happen before the next request, no matter > - * what. > - */ > - if (flush_caches) { > - err = engine->emit_flush(request, EMIT_FLUSH); > - > - /* Not allowed to fail! */ > - WARN(err, "engine->emit_flush() failed: %d!\n", err); > - } > + engine->emit_flush(request, EMIT_FLUSH); > > /* > * Record the position of the start of the breadcrumb so that > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 0e9aba53d0e4..7ee220ded9c9 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -253,9 +253,7 @@ int i915_request_await_object(struct i915_request *to, > int i915_request_await_dma_fence(struct i915_request *rq, > struct dma_fence *fence); > > -void __i915_request_add(struct i915_request *rq, bool flush_caches); > -#define i915_request_add(rq) \ > - __i915_request_add(rq, false) > +void i915_request_add(struct i915_request *rq); > > void __i915_request_submit(struct i915_request *request); > void i915_request_submit(struct i915_request *request); > diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c > index 7846ea4a99bc..fbe4324116d7 100644 > --- a/drivers/gpu/drm/i915/selftests/huge_pages.c > +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c > @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma, > reservation_object_unlock(vma->resv); > > err_request: > - __i915_request_add(rq, err == 0); > + i915_request_add(rq); > > return err; > } > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c > index 340a98c0c804..a4900091ae3d 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c > @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, > > cs = intel_ring_begin(rq, 4); > if (IS_ERR(cs)) { > - __i915_request_add(rq, false); > + i915_request_add(rq); > i915_vma_unpin(vma); > return PTR_ERR(cs); > } > @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, > reservation_object_add_excl_fence(obj->resv, &rq->fence); > reservation_object_unlock(obj->resv); > > - __i915_request_add(rq, true); > + i915_request_add(rq); > > return 0; > } > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > index 708e8d721448..836f1af8b833 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > @@ -182,12 +182,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj, > reservation_object_add_excl_fence(obj->resv, &rq->fence); > reservation_object_unlock(obj->resv); > > - __i915_request_add(rq, true); > + i915_request_add(rq); > > return 0; > > err_request: > - __i915_request_add(rq, false); > + i915_request_add(rq); > err_batch: > i915_vma_unpin(batch); > err_vma: > diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c > index a3a89aadeccb..f5d00332bb31 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_request.c > +++ b/drivers/gpu/drm/i915/selftests/i915_request.c > @@ -466,7 +466,7 @@ empty_request(struct intel_engine_cs *engine, > goto out_request; > > out_request: > - __i915_request_add(request, err == 0); > + i915_request_add(request); > return err ? ERR_PTR(err) : request; > } > > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > index 390a157b37c3..fe7d3190ebfe 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > @@ -245,7 +245,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) > > err = emit_recurse_batch(h, rq); > if (err) { > - __i915_request_add(rq, false); > + i915_request_add(rq); > return ERR_PTR(err); > } > > @@ -318,7 +318,7 @@ static int igt_hang_sanitycheck(void *arg) > *h.batch = MI_BATCH_BUFFER_END; > i915_gem_chipset_flush(i915); > > - __i915_request_add(rq, true); > + i915_request_add(rq); > > timeout = i915_request_wait(rq, > I915_WAIT_LOCKED, > @@ -464,7 +464,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) > } > > i915_request_get(rq); > - __i915_request_add(rq, true); > + i915_request_add(rq); > mutex_unlock(&i915->drm.struct_mutex); > > if (!wait_until_running(&h, rq)) { > @@ -742,7 +742,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, > } > > i915_request_get(rq); > - __i915_request_add(rq, true); > + i915_request_add(rq); > mutex_unlock(&i915->drm.struct_mutex); > > if (!wait_until_running(&h, rq)) { > @@ -942,7 +942,7 @@ static int igt_wait_reset(void *arg) > } > > i915_request_get(rq); > - __i915_request_add(rq, true); > + i915_request_add(rq); > > if (!wait_until_running(&h, rq)) { > struct drm_printer p = drm_info_printer(i915->drm.dev); > @@ -1037,7 +1037,7 @@ static int igt_reset_queue(void *arg) > } > > i915_request_get(prev); > - __i915_request_add(prev, true); > + i915_request_add(prev); > > count = 0; > do { > @@ -1051,7 +1051,7 @@ static int igt_reset_queue(void *arg) > } > > i915_request_get(rq); > - __i915_request_add(rq, true); > + i915_request_add(rq); > > /* > * XXX We don't handle resetting the kernel context > @@ -1184,7 +1184,7 @@ static int igt_handle_error(void *arg) > } > > i915_request_get(rq); > - __i915_request_add(rq, true); > + i915_request_add(rq); > > if (!wait_until_running(&h, rq)) { > struct drm_printer p = drm_info_printer(i915->drm.dev); > diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c > index 0b6da08c8cae..ea27c7cfbf96 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c > +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c > @@ -155,7 +155,7 @@ spinner_create_request(struct spinner *spin, > > err = emit_recurse_batch(spin, rq, arbitration_command); > if (err) { > - __i915_request_add(rq, false); > + i915_request_add(rq); > return ERR_PTR(err); > } > > diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c > index f1cfb0fb6bea..e1ea2d2bedd2 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c > @@ -75,7 +75,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) > i915_gem_object_get(result); > i915_gem_object_set_active_reference(result); > > - __i915_request_add(rq, true); > + i915_request_add(rq); > i915_vma_unpin(vma); > > return result; > -- > 2.17.1 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx