On Thu, Nov 26, 2015 at 09:19:44AM +0000, Nick Hoath wrote: > On 26/11/2015 08:48, Daniel Vetter wrote: > >On Wed, Nov 25, 2015 at 05:02:44PM +0200, Mika Kuoppala wrote: > >>Nick Hoath <nicholas.hoath@xxxxxxxxx> writes: > >> > >>>Use the first retired request on a new context to unpin > >>>the old context. This ensures that the hw context remains > >>>bound until it has been written back to by the GPU. > >>>Now that the context is pinned until later in the request/context > >>>lifecycle, it no longer needs to be pinned from context_queue to > >>>retire_requests. > >>> > >>>v2: Moved the new pin to cover GuC submission (Alex Dai) > >>> Moved the new unpin to request_retire to fix coverage leak > >>>v3: Added switch to default context if freeing a still pinned > >>> context just in case the hw was actually still using it > >>>v4: Unwrapped context unpin to allow calling without a request > >>>v5: Only create a switch to idle context if the ring doesn't > >>> already have a request pending on it (Alex Dai) > >>> Rename unsaved to dirty to avoid double negatives (Dave Gordon) > >>> Changed _no_req postfix to __ prefix for consistency (Dave Gordon) > >>> Split out per engine cleanup from context_free as it > >>> was getting unwieldy > >>> Corrected locking (Dave Gordon) > >>> > >>>Signed-off-by: Nick Hoath <nicholas.hoath@xxxxxxxxx> > >>>Issue: VIZ-4277 > >>>Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > >>>Cc: David Gordon <david.s.gordon@xxxxxxxxx> > >>>Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > >>>Cc: Alex Dai <yu.dai@xxxxxxxxx> > >>>--- > >>> drivers/gpu/drm/i915/i915_drv.h | 1 + > >>> drivers/gpu/drm/i915/i915_gem.c | 3 + > >>> drivers/gpu/drm/i915/intel_lrc.c | 124 +++++++++++++++++++++++++++++++-------- > >>> drivers/gpu/drm/i915/intel_lrc.h | 1 + > >>> 4 files changed, 105 insertions(+), 24 deletions(-) > >>> > >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > >>>index d5cf30b..e82717a 100644 > >>>--- a/drivers/gpu/drm/i915/i915_drv.h > >>>+++ b/drivers/gpu/drm/i915/i915_drv.h > >>>@@ -889,6 +889,7 @@ struct intel_context { > >>> struct { > >>> struct drm_i915_gem_object *state; > >>> struct intel_ringbuffer *ringbuf; > >>>+ bool dirty; > >>> int pin_count; > >>> } engine[I915_NUM_RINGS]; > >>> > >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >>>index e955499..3829bc1 100644 > >>>--- a/drivers/gpu/drm/i915/i915_gem.c > >>>+++ b/drivers/gpu/drm/i915/i915_gem.c > >>>@@ -1354,6 +1354,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) > >>> { > >>> trace_i915_gem_request_retire(request); > >>> > >>>+ if (i915.enable_execlists) > >>>+ intel_lr_context_complete_check(request); > >>>+ > >>> /* We know the GPU must have read the request to have > >>> * sent us the seqno + interrupt, so use the position > >>> * of tail of the request to update the last known position > >>>diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > >>>index 06180dc..03d5bca 100644 > >>>--- a/drivers/gpu/drm/i915/intel_lrc.c > >>>+++ b/drivers/gpu/drm/i915/intel_lrc.c > >>>@@ -566,9 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) > >>> struct drm_i915_gem_request *cursor; > >>> int num_elements = 0; > >>> > >>>- if (request->ctx != ring->default_context) > >>>- intel_lr_context_pin(request); > >>>- > >>> i915_gem_request_reference(request); > >>> > >>> spin_lock_irq(&ring->execlist_lock); > >>>@@ -732,6 +729,13 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) > >>> if (intel_ring_stopped(ring)) > >>> return; > >>> > >>>+ if (request->ctx != ring->default_context) { > >>>+ if (!request->ctx->engine[ring->id].dirty) { > >>>+ intel_lr_context_pin(request); > >>>+ request->ctx->engine[ring->id].dirty = true; > >>>+ } > >>>+ } > >>>+ > >>> if (dev_priv->guc.execbuf_client) > >>> i915_guc_submit(dev_priv->guc.execbuf_client, request); > >>> else > >>>@@ -958,12 +962,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) > >>> spin_unlock_irq(&ring->execlist_lock); > >>> > >>> list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { > >>>- struct intel_context *ctx = req->ctx; > >>>- struct drm_i915_gem_object *ctx_obj = > >>>- ctx->engine[ring->id].state; > >>>- > >>>- if (ctx_obj && (ctx != ring->default_context)) > >>>- intel_lr_context_unpin(req); > >>> list_del(&req->execlist_link); > >>> i915_gem_request_unreference(req); > >>> } > >>>@@ -1058,21 +1056,39 @@ reset_pin_count: > >>> return ret; > >>> } > >>> > >>>-void intel_lr_context_unpin(struct drm_i915_gem_request *rq) > >>>+static void __intel_lr_context_unpin(struct intel_engine_cs *ring, > >>>+ struct intel_context *ctx) > >>> { > >>>- struct intel_engine_cs *ring = rq->ring; > >>>- struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; > >>>- struct intel_ringbuffer *ringbuf = rq->ringbuf; > >>>- > >>>+ struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; > >>>+ struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > >>> if (ctx_obj) { > >>> WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > >>>- if (--rq->ctx->engine[ring->id].pin_count == 0) { > >>>+ if (--ctx->engine[ring->id].pin_count == 0) { > >>> intel_unpin_ringbuffer_obj(ringbuf); > >>> i915_gem_object_ggtt_unpin(ctx_obj); > >>> } > >>> } > >>> } > >>> > >>>+void intel_lr_context_unpin(struct drm_i915_gem_request *rq) > >>>+{ > >>>+ __intel_lr_context_unpin(rq->ring, rq->ctx); > >>>+} > >>>+ > >>>+void intel_lr_context_complete_check(struct drm_i915_gem_request *req) > >>>+{ > >>>+ struct intel_engine_cs *ring = req->ring; > >>>+ > >>>+ if (ring->last_context && ring->last_context != req->ctx && > >>>+ ring->last_context->engine[ring->id].dirty) { > >>>+ __intel_lr_context_unpin( > >>>+ ring, > >>>+ ring->last_context); > >>>+ ring->last_context->engine[ring->id].dirty = false; > >>>+ } > >>>+ ring->last_context = req->ctx; > >> > >>What ensures that the last_context stays alive? > > > >The other part that's missing compared to the legacy context cycling logic > >is move_to_active. Without that the shrinker can't eat into retired > >contexts, which renders this exercise fairly moot imo. > > > >The overall idea is that since i915 does dynamic memory management, and > >that infects everything in the code that deals with gpu objects, we need > >to make sure that everyone is using the same logicy to cycle through > >active objects. Otherwise the shrinker will be an unmaintainable hydra > >with per-feature special cases. > >-Daniel > > This is the first part of the VIZ-4277 fix changeset that I'm working on. It > has been 'fasttracked' as it is needed to fix a lockup with GuC submission. > If you like, I can update the commit message to explain that? Yeah if this is a minimal bugfix for some issue then this definitely should be mentioned in the commit message, including a precise description of what and how exactly the current code blows up. -Daniel > > > > >> > >>>+} > >>>+ > >>> static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) > >>> { > >>> int ret, i; > >>>@@ -2367,6 +2383,62 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o > >>> } > >>> > >>> /** > >>>+ * intel_lr_context_clean_ring() - clean the ring specific parts of an LRC > >>>+ * @ctx: the LR context being freed. > >>>+ * @ring: the engine being cleaned > >>>+ * @ctx_obj: the hw context being unreferenced > >>>+ * @ringbuf: the ringbuf being freed > >>>+ * > >>>+ * Take care of cleaning up the per-engine backing > >>>+ * objects and the logical ringbuffer. > >>>+ */ > >>>+static void > >>>+intel_lr_context_clean_ring(struct intel_context *ctx, > >>>+ struct intel_engine_cs *ring, > >>>+ struct drm_i915_gem_object *ctx_obj, > >>>+ struct intel_ringbuffer *ringbuf) > >>>+{ > >>>+ WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > >>>+ > >>>+ if (ctx == ring->default_context) { > >>>+ intel_unpin_ringbuffer_obj(ringbuf); > >>>+ i915_gem_object_ggtt_unpin(ctx_obj); > >>>+ } > >>>+ > >>>+ if (ctx->engine[ring->id].dirty) { > >>>+ struct drm_i915_gem_request *req = NULL; > >>>+ > >>>+ /** > >>>+ * If there is already a request pending on > >>>+ * this ring, wait for that to complete, > >>>+ * otherwise create a switch to idle request > >>>+ */ > >>>+ if (list_empty(&ring->request_list)) { > >>>+ int ret; > >>>+ > >>>+ ret = i915_gem_request_alloc( > >>>+ ring, > >>>+ ring->default_context, > >>>+ &req); > >>>+ if (!ret) > >>>+ i915_add_request(req); > >>>+ else > >>>+ DRM_DEBUG("Failed to ensure context saved"); > >>>+ } else { > >>>+ req = list_first_entry( > >>>+ &ring->request_list, > >>>+ typeof(*req), list); > >>>+ } > >>>+ if (req) > >>>+ i915_wait_request(req); > >>>+ } > >>>+ > >>>+ WARN_ON(ctx->engine[ring->id].pin_count); > >>>+ intel_ringbuffer_free(ringbuf); > >>>+ drm_gem_object_unreference(&ctx_obj->base); > >>>+} > >>>+ > >>>+/** > >>> * intel_lr_context_free() - free the LRC specific bits of a context > >>> * @ctx: the LR context to free. > >>> * > >>>@@ -2378,21 +2450,18 @@ void intel_lr_context_free(struct intel_context *ctx) > >>> { > >>> int i; > >>> > >>>- for (i = 0; i < I915_NUM_RINGS; i++) { > >>>+ for (i = 0; i < I915_NUM_RINGS; ++i) { > >>> struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; > >>> > >>>- if (ctx_obj) { > >>>+ if (ctx->engine[i].state) { > >> > >>++i and the above are superflouous? > >> > >>-Mika > >> > >>> struct intel_ringbuffer *ringbuf = > >>> ctx->engine[i].ringbuf; > >>> struct intel_engine_cs *ring = ringbuf->ring; > >>> > >>>- if (ctx == ring->default_context) { > >>>- intel_unpin_ringbuffer_obj(ringbuf); > >>>- i915_gem_object_ggtt_unpin(ctx_obj); > >>>- } > >>>- WARN_ON(ctx->engine[ring->id].pin_count); > >>>- intel_ringbuffer_free(ringbuf); > >>>- drm_gem_object_unreference(&ctx_obj->base); > >>>+ intel_lr_context_clean_ring(ctx, > >>>+ ring, > >>>+ ctx_obj, > >>>+ ringbuf); > >>> } > >>> } > >>> } > >>>@@ -2554,5 +2623,12 @@ void intel_lr_context_reset(struct drm_device *dev, > >>> > >>> ringbuf->head = 0; > >>> ringbuf->tail = 0; > >>>+ > >>>+ if (ctx->engine[ring->id].dirty) { > >>>+ __intel_lr_context_unpin( > >>>+ ring, > >>>+ ctx); > >>>+ ctx->engine[ring->id].dirty = false; > >>>+ } > >>> } > >>> } > >>>diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > >>>index 4e60d54..cbc42b8 100644 > >>>--- a/drivers/gpu/drm/i915/intel_lrc.h > >>>+++ b/drivers/gpu/drm/i915/intel_lrc.h > >>>@@ -86,6 +86,7 @@ void intel_lr_context_reset(struct drm_device *dev, > >>> struct intel_context *ctx); > >>> uint64_t intel_lr_context_descriptor(struct intel_context *ctx, > >>> struct intel_engine_cs *ring); > >>>+void intel_lr_context_complete_check(struct drm_i915_gem_request *req); > >>> > >>> /* Execlists */ > >>> int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); > >>>-- > >>>1.9.1 > >>> > >>>_______________________________________________ > >>>Intel-gfx mailing list > >>>Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > >>>http://lists.freedesktop.org/mailman/listinfo/intel-gfx > >>_______________________________________________ > >>Intel-gfx mailing list > >>Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > >>http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx