Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > The engine also provides a mirror of the CSB write pointer in the HWSP, > but not of our read pointer. To take advantage of this we need to > remember where we read up to on the last interrupt and continue off from > there. This poses a problem following a reset, as we don't know where > the hw will start writing from, and due to the use of power contexts we > cannot perform that query during the reset itself. So we continue the > current modus operandi of delaying the first read of the context-status > read/write pointers until after the first interrupt. With this we should > now have eliminated all uncached mmio reads in handling the > context-status interrupt, though we still have the uncached mmio writes > for submitting new work, and many uncached mmio reads in the global > interrupt handler itself. Still a step in the right direction towards > reducing our resubmit latency, although it appears lost in the noise! > > v2: Cannonlake moved the CSB write index > v3: Include the sw/hwsp state in debugfs/i915_engine_info > v4: Also revert to using CSB mmio for GVT-g > v5: Prevent the compiler reloading tail (Mika) > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Michel Thierry <michel.thierry@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx> > Cc: Zhi Wang <zhi.a.wang@xxxxxxxxx> > Acked-by: Michel Thierry <michel.thierry@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++-- > drivers/gpu/drm/i915/i915_drv.h | 8 ++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 27 ++++++++++++++++++++------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ > 4 files changed, 35 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 7062cde94a49..12381045ed6a 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -3326,8 +3326,10 @@ static int i915_engine_info(struct seq_file *m, void *unused) > ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); > read = GEN8_CSB_READ_PTR(ptr); > write = GEN8_CSB_WRITE_PTR(ptr); > - seq_printf(m, "\tExeclist CSB read %d, write %d, interrupt posted? %s\n", > - read, write, > + seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", > + read, engine->csb_head, > + write, > + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), > yesno(test_bit(ENGINE_IRQ_EXECLIST, > &engine->irq_posted))); > if (read >= GEN8_CSB_ENTRIES) > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 1cc31a5b049f..78195b7c64db 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -4399,4 +4399,12 @@ int remap_io_mapping(struct vm_area_struct *vma, > unsigned long addr, unsigned long pfn, unsigned long size, > struct io_mapping *iomap); > > +static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) > +{ > + if (INTEL_GEN(i915) >= 10) > + return CNL_HWS_CSB_WRITE_INDEX; > + else > + return I915_HWS_CSB_WRITE_INDEX; > +} > + > #endif > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 8e4b21a18554..55d7eee21226 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -545,8 +545,6 @@ static void intel_lrc_irq_handler(unsigned long data) > * new request (outside of the context-switch interrupt). > */ > while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { > - u32 __iomem *csb_mmio = > - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); > /* The HWSP contains a (cacheable) mirror of the CSB */ > const u32 *buf = > &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > @@ -556,6 +554,7 @@ static void intel_lrc_irq_handler(unsigned long data) > if (unlikely(engine->csb_use_mmio)) { > buf = (u32 * __force) > (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > + engine->csb_head = -1; /* force mmio read of CSB ptrs */ > } > > /* The write will be ordered by the uncached read (itself > @@ -569,9 +568,19 @@ static void intel_lrc_irq_handler(unsigned long data) > * is set and we do a new loop. > */ > __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - head = readl(csb_mmio); > - tail = GEN8_CSB_WRITE_PTR(head); > - head = GEN8_CSB_READ_PTR(head); > + if (unlikely(engine->csb_head == -1)) { /* following a reset */ Was going to suggest using the same csb_use_mmio flag for this but that would not gain much when looking at the read ptr write further down. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > + head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > + tail = GEN8_CSB_WRITE_PTR(head); > + head = GEN8_CSB_READ_PTR(head); > + engine->csb_head = head; > + } else { > + const int write_idx = > + intel_hws_csb_write_index(dev_priv) - > + I915_HWS_CSB_BUF0_INDEX; > + > + head = engine->csb_head; > + tail = READ_ONCE(buf[write_idx]); > + } > while (head != tail) { > struct drm_i915_gem_request *rq; > unsigned int status; > @@ -625,8 +634,11 @@ static void intel_lrc_irq_handler(unsigned long data) > !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > } > > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > - csb_mmio); > + if (head != engine->csb_head) { > + engine->csb_head = head; > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > + } > } > > if (execlists_elsp_ready(engine)) > @@ -1275,6 +1287,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) > I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), > GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); > clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > + engine->csb_head = -1; > > /* After a GPU reset, we may have requests to replay */ > submit = false; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 5c055b62966d..4d63a2c0b2e1 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -392,6 +392,7 @@ struct intel_engine_cs { > struct rb_node *execlist_first; > unsigned int fw_domains; > bool csb_use_mmio; > + unsigned int csb_head; > > /* Contexts are pinned whilst they are active on the GPU. The last > * context executed remains active whilst the GPU is idle - the > @@ -498,6 +499,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) > #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) > > #define I915_HWS_CSB_BUF0_INDEX 0x10 > +#define I915_HWS_CSB_WRITE_INDEX 0x1f > +#define CNL_HWS_CSB_WRITE_INDEX 0x2f > > struct intel_ring * > intel_engine_create_ring(struct intel_engine_cs *engine, int size); > -- > 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx