On Thu, Jul 24, 2014 at 05:04:29PM +0100, Thomas Daniel wrote: > From: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > On a previous iteration of this patch, I created an Execlists > version of __i915_add_request and asbtracted it away as a > vfunc. Daniel Vetter wondered then why that was needed: > > "with the clean split in command submission I expect every > function to know wether it'll submit to an lrc (everything in > intel_lrc.c) or wether it'll submit to a legacy ring (existing > code), so I don't see a need for an add_request vfunc." > > The honest, hairy truth is that this patch is the glue keeping > the whole logical ring puzzle together: Oops, I didn't spot this and it's indeed not terribly pretty. > > - i915_add_request is used by intel_ring_idle, which in turn is > used by i915_gpu_idle, which in turn is used in several places > inside the eviction and gtt codes. This should probably be folded in with the lrc specific version of stop_rings and so should work out. > - Also, it is used by i915_gem_check_olr, which is littered all > over i915_gem.c We now always preallocate the request struct, so olr is officially dead. Well almost, except for non-execbuf stuff that we emit through the rings. Which is nothing for lrc/execlist mode. Also there's the icky-bitty problem with ringbuf->ctx which makes this patch not apply any more. I think we need to revise or at least discuss a bit. > - ... > > If I were to duplicate all the code that directly or indirectly > uses __i915_add_request, I'll end up creating a separate driver. > > To show the differences between the existing legacy version and > the new Execlists one, this time I have special-cased > __i915_add_request instead of adding an add_request vfunc. I > hope this helps to untangle this Gordian knot. > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem.c | 72 ++++++++++++++++++++++++++++---------- > drivers/gpu/drm/i915/intel_lrc.c | 30 +++++++++++++--- > drivers/gpu/drm/i915/intel_lrc.h | 1 + > 3 files changed, 80 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 9560b40..1c83b9c 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2327,10 +2327,21 @@ int __i915_add_request(struct intel_engine_cs *ring, > { > struct drm_i915_private *dev_priv = ring->dev->dev_private; > struct drm_i915_gem_request *request; > + struct intel_ringbuffer *ringbuf; > u32 request_ring_position, request_start; > int ret; > > - request_start = intel_ring_get_tail(ring->buffer); > + request = ring->preallocated_lazy_request; > + if (WARN_ON(request == NULL)) > + return -ENOMEM; > + > + if (i915.enable_execlists) { > + struct intel_context *ctx = request->ctx; > + ringbuf = ctx->engine[ring->id].ringbuf; > + } else > + ringbuf = ring->buffer; > + > + request_start = intel_ring_get_tail(ringbuf); > /* > * Emit any outstanding flushes - execbuf can fail to emit the flush > * after having emitted the batchbuffer command. Hence we need to fix > @@ -2338,24 +2349,32 @@ int __i915_add_request(struct intel_engine_cs *ring, > * is that the flush _must_ happen before the next request, no matter > * what. > */ > - ret = intel_ring_flush_all_caches(ring); > - if (ret) > - return ret; > - > - request = ring->preallocated_lazy_request; > - if (WARN_ON(request == NULL)) > - return -ENOMEM; > + if (i915.enable_execlists) { > + ret = logical_ring_flush_all_caches(ringbuf); > + if (ret) > + return ret; > + } else { > + ret = intel_ring_flush_all_caches(ring); > + if (ret) > + return ret; > + } > > /* Record the position of the start of the request so that > * should we detect the updated seqno part-way through the > * GPU processing the request, we never over-estimate the > * position of the head. > */ > - request_ring_position = intel_ring_get_tail(ring->buffer); > + request_ring_position = intel_ring_get_tail(ringbuf); > > - ret = ring->add_request(ring); > - if (ret) > - return ret; > + if (i915.enable_execlists) { > + ret = ring->emit_request(ringbuf); > + if (ret) > + return ret; > + } else { > + ret = ring->add_request(ring); > + if (ret) > + return ret; > + } > > request->seqno = intel_ring_get_seqno(ring); > request->ring = ring; > @@ -2370,12 +2389,14 @@ int __i915_add_request(struct intel_engine_cs *ring, > */ > request->batch_obj = obj; > > - /* Hold a reference to the current context so that we can inspect > - * it later in case a hangcheck error event fires. > - */ > - request->ctx = ring->last_context; > - if (request->ctx) > - i915_gem_context_reference(request->ctx); > + if (!i915.enable_execlists) { > + /* Hold a reference to the current context so that we can inspect > + * it later in case a hangcheck error event fires. > + */ > + request->ctx = ring->last_context; > + if (request->ctx) > + i915_gem_context_reference(request->ctx); > + } > > request->emitted_jiffies = jiffies; > list_add_tail(&request->list, &ring->request_list); > @@ -2630,6 +2651,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) > > while (!list_empty(&ring->request_list)) { > struct drm_i915_gem_request *request; > + struct intel_ringbuffer *ringbuf; > > request = list_first_entry(&ring->request_list, > struct drm_i915_gem_request, > @@ -2639,12 +2661,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) > break; > > trace_i915_gem_request_retire(ring, request->seqno); > + > + /* This is one of the few common intersection points > + * between legacy ringbuffer submission and execlists: > + * we need to tell them apart in order to find the correct > + * ringbuffer to which the request belongs to. > + */ > + if (i915.enable_execlists) { > + struct intel_context *ctx = request->ctx; > + ringbuf = ctx->engine[ring->id].ringbuf; > + } else > + ringbuf = ring->buffer; > + > /* We know the GPU must have read the request to have > * sent us the seqno + interrupt, so use the position > * of tail of the request to update the last known position > * of the GPU head. > */ > - ring->buffer->last_retired_head = request->tail; > + ringbuf->last_retired_head = request->tail; > > i915_gem_free_request(request); > } > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 5dd63d6..dcf59c6 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -106,6 +106,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) > /* TODO */ > } > > +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) > +{ > + struct intel_engine_cs *ring = ringbuf->ring; > + int ret; > + > + if (!ring->gpu_caches_dirty) > + return 0; > + > + ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); > + if (ret) > + return ret; > + > + ring->gpu_caches_dirty = false; > + return 0; > +} > + > void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) > { > intel_logical_ring_advance(ringbuf); > @@ -116,7 +132,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) > /* TODO: how to submit a context to the ELSP is not here yet */ > } > > -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) > +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > + struct intel_context *ctx) > { > if (ring->outstanding_lazy_seqno) > return 0; > @@ -128,6 +145,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) > if (request == NULL) > return -ENOMEM; > > + /* Hold a reference to the context this request belongs to > + * (we will need it when the time comes to emit/retire the > + * request). > + */ > + request->ctx = ctx; > + i915_gem_context_reference(request->ctx); > + > ring->preallocated_lazy_request = request; > } > > @@ -165,8 +189,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int bytes > if (ret) > return ret; > > - /* TODO: make sure we update the right ringbuffer's last_retired_head > - * when retiring requests */ > i915_gem_retire_requests_ring(ring); > ringbuf->head = ringbuf->last_retired_head; > ringbuf->last_retired_head = -1; > @@ -291,7 +313,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) > return ret; > > /* Preallocate the olr before touching the ring */ > - ret = logical_ring_alloc_seqno(ring); > + ret = logical_ring_alloc_seqno(ring, ringbuf->ctx); Ok, this is hairy. Really hairy, since this uses ringbuf->ctx. Not sure we really want this or need this. > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index 16798b6..696e09e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); > int intel_logical_rings_init(struct drm_device *dev); > > +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); > void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); > static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) > { > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx