The engine also provides a mirror of the CSB write pointer in the HWSP, but not of our read pointer. To take advantage of this we need to remember where we read up to on the last interrupt and continue off from there. This poses a problem following a reset, as we don't know where the hw will start writing from, and due to the use of power contexts we cannot perform that query during the reset itself. So we continue the current modus operandi of delaying the first read of the context-status read/write pointers until after the first interrupt. With this we should now have eliminated all uncached mmio reads in handling the context-status interrupt, though we still have the uncached mmio writes for submitting new work, and many uncached mmio reads in the global interrupt handler itself. Still a step in the right direction towards reducing our resubmit latency, although it appears lost in the noise! v2: Cannonlake moved the CSB write index v3: Include the sw/hwsp state in debugfs/i915_engine_info v4: Also revert to using CSB mmio for GVT-g v5: Prevent the compiler reloading tail (Mika) Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Michel Thierry <michel.thierry@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx> Cc: Zhi Wang <zhi.a.wang@xxxxxxxxx> Acked-by: Michel Thierry <michel.thierry@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++-- drivers/gpu/drm/i915/i915_drv.h | 8 ++++++++ drivers/gpu/drm/i915/intel_lrc.c | 27 ++++++++++++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 92aa7465e950..2d8de3a32acd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3323,8 +3323,10 @@ static int i915_engine_info(struct seq_file *m, void *unused) ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); read = GEN8_CSB_READ_PTR(ptr); write = GEN8_CSB_WRITE_PTR(ptr); - seq_printf(m, "\tExeclist CSB read %d, write %d, interrupt posted? %s\n", - read, write, + seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + read, engine->csb_head, + write, + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), yesno(test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))); if (read >= GEN8_CSB_ENTRIES) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 60267e375e88..77781f3e92be 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4336,4 +4336,12 @@ intel_engine_can_store_dword(struct intel_engine_cs *engine) engine->class); } +static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 10) + return CNL_HWS_CSB_WRITE_INDEX; + else + return I915_HWS_CSB_WRITE_INDEX; +} + #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e889b57ec676..2c1017d9ed29 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -545,8 +545,6 @@ static void intel_lrc_irq_handler(unsigned long data) * new request (outside of the context-switch interrupt). */ while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { - u32 __iomem *csb_mmio = - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); /* The HWSP contains a (cacheable) mirror of the CSB */ const u32 *buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; @@ -556,6 +554,7 @@ static void intel_lrc_irq_handler(unsigned long data) if (unlikely(engine->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + engine->csb_head = -1; /* force mmio read of CSB ptrs */ } /* The write will be ordered by the uncached read (itself @@ -569,9 +568,19 @@ static void intel_lrc_irq_handler(unsigned long data) * is set and we do a new loop. */ __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - head = readl(csb_mmio); - tail = GEN8_CSB_WRITE_PTR(head); - head = GEN8_CSB_READ_PTR(head); + if (unlikely(engine->csb_head == -1)) { /* following a reset */ + head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + tail = GEN8_CSB_WRITE_PTR(head); + head = GEN8_CSB_READ_PTR(head); + engine->csb_head = head; + } else { + const int write_idx = + intel_hws_csb_write_index(dev_priv) - + I915_HWS_CSB_BUF0_INDEX; + + head = engine->csb_head; + tail = READ_ONCE(buf[write_idx]); + } while (head != tail) { struct drm_i915_gem_request *rq; unsigned int status; @@ -625,8 +634,11 @@ static void intel_lrc_irq_handler(unsigned long data) !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - csb_mmio); + if (head != engine->csb_head) { + engine->csb_head = head; + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + } } if (execlists_elsp_ready(engine)) @@ -1275,6 +1287,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + engine->csb_head = -1; /* After a GPU reset, we may have requests to replay */ submit = false; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 67b91f0b29bb..76220cd50926 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -392,6 +392,7 @@ struct intel_engine_cs { struct rb_node *execlist_first; unsigned int fw_domains; bool csb_use_mmio; + unsigned int csb_head; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the @@ -498,6 +499,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_HWS_CSB_BUF0_INDEX 0x10 +#define I915_HWS_CSB_WRITE_INDEX 0x1f +#define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx