On 19/04/16 07:49, Chris Wilson wrote: > If we move the release of the GEM request (i.e. decoupling it from the > various lists used for client and context tracking) after it is complete > (either by the GPU retiring the request, or by the caller cancelling the > request), we can remove the requirement that the final unreference of > the GEM request need to be under the struct_mutex. > > v2,v3: Rebalance execlists by moving the context unpinning. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem.c | 4 ++-- > drivers/gpu/drm/i915/i915_gem_request.c | 25 ++++++++++--------------- > drivers/gpu/drm/i915/i915_gem_request.h | 14 -------------- > drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 +- > drivers/gpu/drm/i915/intel_display.c | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 4 ---- > drivers/gpu/drm/i915/intel_pm.c | 2 +- > 7 files changed, 15 insertions(+), 38 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 4057c0febccd..1f5434f7a294 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2542,7 +2542,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) > ret = __i915_wait_request(req[i], true, > args->timeout_ns > 0 ? &args->timeout_ns : NULL, > to_rps_client(file)); > - i915_gem_request_unreference__unlocked(req[i]); > + i915_gem_request_unreference(req[i]); > } > return ret; > > @@ -3548,7 +3548,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) > return 0; > > ret = __i915_wait_request(target, true, NULL, NULL); > - i915_gem_request_unreference__unlocked(target); > + i915_gem_request_unreference(target); > > return ret; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 8d7c415f1896..c14dca3d8b87 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -250,6 +250,7 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) > static void i915_gem_request_retire(struct drm_i915_gem_request *request) > { > trace_i915_gem_request_retire(request); > + list_del_init(&request->list); > > /* We know the GPU must have read the request to have > * sent us the seqno + interrupt, so use the position > @@ -261,9 +262,15 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) > */ > request->ringbuf->last_retired_head = request->postfix; > > - list_del_init(&request->list); > i915_gem_request_remove_from_client(request); > > + if (request->pinned_context) { > + if (i915.enable_execlists) > + intel_lr_context_unpin(request->pinned_context, > + request->engine); > + } > + > + i915_gem_context_unreference(request->ctx); > i915_gem_request_unreference(request); > } > > @@ -636,19 +643,7 @@ int i915_wait_request(struct drm_i915_gem_request *req) > > void i915_gem_request_free(struct kref *req_ref) > { > - struct drm_i915_gem_request *req = container_of(req_ref, > - typeof(*req), ref); > - struct intel_context *ctx = req->ctx; > - > - if (req->file_priv) > - i915_gem_request_remove_from_client(req); > - > - if (req->pinned_context) { > - if (i915.enable_execlists) > - intel_lr_context_unpin(req->pinned_context, > - req->engine); > - } > - > - i915_gem_context_unreference(ctx); > + struct drm_i915_gem_request *req = > + container_of(req_ref, typeof(*req), ref); > kmem_cache_free(to_i915(req)->requests, req); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 389813cbc19a..48bff7dc4f90 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -170,23 +170,9 @@ i915_gem_request_reference(struct drm_i915_gem_request *req) > static inline void > i915_gem_request_unreference(struct drm_i915_gem_request *req) > { > - WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); > kref_put(&req->ref, i915_gem_request_free); > } > > -static inline void > -i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) > -{ > - struct drm_device *dev; > - > - if (!req) > - return; > - > - dev = req->engine->dev; > - if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex)) > - mutex_unlock(&dev->struct_mutex); > -} > - > static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, > struct drm_i915_gem_request *src) > { > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c > index 79f175853548..d7d19e17318e 100644 > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c ! :) > @@ -379,7 +379,7 @@ static int intel_breadcrumbs_signaler(void *arg) > */ > intel_engine_remove_wait(engine, &signal->wait); > > - i915_gem_request_unreference__unlocked(signal->request); > + i915_gem_request_unreference(signal->request); > > /* Find the next oldest signal. Note that as we have > * not been holding the lock, another client may > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index caff656417c2..07dfd55fff91 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -11370,7 +11370,7 @@ static void intel_mmio_flip_work_func(struct work_struct *work) > WARN_ON(__i915_wait_request(mmio_flip->req, > false, NULL, > &mmio_flip->i915->rps.mmioflips)); > - i915_gem_request_unreference__unlocked(mmio_flip->req); > + i915_gem_request_unreference(mmio_flip->req); > } > > /* For framebuffer backed by dmabuf, wait for fence */ > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 0e55f206e592..0eaa74fab87b 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -587,7 +587,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request) > struct drm_i915_gem_request *cursor; > int num_elements = 0; > > - intel_lr_context_pin(request->ctx, request->engine); I would prefer this step to be a separate patch from the retire/free reorg. Also, in my testing, another patch was required before this step to make it work. Basically: >From 848df37ef90dfb7c7adbf59d155c39d8e4521b8e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Date: Fri, 15 Apr 2016 16:13:26 +0100 Subject: [PATCH 6/7] drm/i915: Store LRC hardware id in the context This way in the following patch we can disconnect requests from contexts. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/intel_lrc.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 79c23108cc35..d85c03425c54 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2334,6 +2334,9 @@ struct drm_i915_gem_request { /** Execlists no. of times this request has been sent to the ELSP */ int elsp_submitted; + + /** Execlists context hardware id. */ + unsigned ctx_hw_id; }; struct drm_i915_gem_request * __must_check diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f8da1b01458..871f399f84f6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -475,7 +475,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 request_id) if (!head_req) return 0; - if (unlikely(head_req->ctx->hw_id != request_id)) + if (unlikely(head_req->ctx_hw_id != request_id)) return 0; WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); @@ -614,6 +614,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request) } list_add_tail(&request->execlist_link, &engine->execlist_queue); + request->ctx_hw_id = request->ctx->hw_id; if (num_elements == 0) execlists_context_unqueue(engine); -- 1.9.1 Otherwise I was hitting requests with unpinned LRCs on the execlists queue so check_remove was failing to spot progress. I had a theory on why that was happening but I am not sure about it any longer. Will try to stress it a bit with your series and see if I can hit the problem. But either way I think removing the extra execlist pin/unpin removal from this patch would be better. > i915_gem_request_reference(request); > > spin_lock_bh(&engine->execlist_lock); > @@ -1006,9 +1005,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine) > spin_unlock_bh(&engine->execlist_lock); > > list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { > - if (req->pinned_context) > - intel_lr_context_unpin(req->pinned_context, engine); > - > list_del(&req->execlist_link); > i915_gem_request_unreference(req); > } > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index b964c448bc03..565b725cd817 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -7364,7 +7364,7 @@ static void __intel_rps_boost_work(struct work_struct *work) > if (!i915_gem_request_completed(req)) > gen6_rps_boost(to_i915(req), NULL, req->emitted_jiffies); > > - i915_gem_request_unreference__unlocked(req); > + i915_gem_request_unreference(req); > kfree(boost); > } > > Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx