On 11/02/16 21:00, Chris Wilson wrote: > On Thu, Feb 11, 2016 at 06:03:09PM +0000, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> >> >> Only caller to get_context_status ensures read pointer stays in >> range so the WARN is impossible. Also, if the WARN would be >> triggered by a hypothetical new caller stale status would be >> returned to them. >> >> Maybe it is better to wrap the pointer in the function itself >> then to avoid both and even results in smaller code. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/intel_lrc.c | 15 ++++++--------- >> 1 file changed, 6 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c >> index 89eb892df4ae..951f1e6af947 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -507,17 +507,16 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, >> return false; >> } >> >> -static void get_context_status(struct intel_engine_cs *ring, >> - u8 read_pointer, >> - u32 *status, u32 *context_id) >> +static u32 get_context_status(struct intel_engine_cs *ring, u8 read_pointer, >> + u32 *context_id) >> { >> struct drm_i915_private *dev_priv = ring->dev->dev_private; >> >> - if (WARN_ON(read_pointer >= GEN8_CSB_ENTRIES)) >> - return; >> + read_pointer %= GEN8_CSB_ENTRIES; >> >> - *status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer)); >> *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer)); > > Micro-optimising hat says not to even do the uncached, spinlocked mmio > read when not required. You mean move the forcewake grab out from elsp write to cover all mmio reads? These two patches make the irq handler around 3.7% smaller and moving the forcewake/uncore lock shrinks that by a 1% more. Must be faster as well, if someone could measure it. :) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1e7ccd0a6573..77a64008f53d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -375,8 +375,6 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, rq0->elsp_submitted++; /* You must always write both descriptors in the order below. */ - spin_lock(&dev_priv->uncore.lock); - intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL); I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1])); I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1])); @@ -386,8 +384,6 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, /* ELSP is a wo register, use another nearby reg for posting */ POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring)); - intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL); - spin_unlock(&dev_priv->uncore.lock); } static int execlists_update_context(struct drm_i915_gem_request *rq) @@ -513,9 +509,9 @@ static u32 get_context_status(struct intel_engine_cs *ring, u8 read_pointer, read_pointer %= GEN8_CSB_ENTRIES; - *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer)); + *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer)); - return I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer)); + return I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer)); } /** @@ -535,15 +531,18 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) u32 status_id; u32 submit_contexts = 0; - status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + spin_lock(&ring->execlist_lock); + + spin_lock(&dev_priv->uncore.lock); + intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL); + + status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(ring)); read_pointer = ring->next_context_status_buffer; write_pointer = GEN8_CSB_WRITE_PTR(status_pointer); if (read_pointer > write_pointer) write_pointer += GEN8_CSB_ENTRIES; - spin_lock(&ring->execlist_lock); - while (read_pointer < write_pointer) { status = get_context_status(ring, ++read_pointer, &status_id); @@ -569,22 +568,26 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) GEN8_CTX_STATUS_ACTIVE_IDLE)))) execlists_context_unqueue(ring); - spin_unlock(&ring->execlist_lock); - - if (unlikely(submit_contexts > 2)) - DRM_ERROR("More than two context complete events?\n"); - ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES; /* Update the read pointer to the old write pointer. Manual ringbuffer * management ftw </sarcasm> */ - I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), - _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, - ring->next_context_status_buffer << 8)); + I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(ring), + _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, + ring->next_context_status_buffer << 8)); + + intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL); + spin_unlock(&dev_priv->uncore.lock); + + spin_unlock(&ring->execlist_lock); + + if (unlikely(submit_contexts > 2)) + DRM_ERROR("More than two context complete events?\n"); } static int execlists_context_queue(struct drm_i915_gem_request *request) { + struct drm_i915_private *dev_priv = request->i915; struct intel_engine_cs *ring = request->ring; struct drm_i915_gem_request *cursor; int num_elements = 0; @@ -616,9 +619,16 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) } list_add_tail(&request->execlist_link, &ring->execlist_queue); - if (num_elements == 0) + if (num_elements == 0) { + spin_lock(&dev_priv->uncore.lock); + intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL); + execlists_context_unqueue(ring); + intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL); + spin_unlock(&dev_priv->uncore.lock); + } + spin_unlock_irq(&ring->execlist_lock); return 0; Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx