Quoting Tvrtko Ursulin (2018-06-28 11:02:17) > > On 27/06/2018 22:07, Chris Wilson wrote: > > Following the removal of the last workarounds, the only CSB mmio access > > is for the old vGPU interface. The mmio registers presented by vGPU do > > not require forcewake and can be treated as ordinary volatile memory, > > i.e. they behave just like the HWSP access just at a different location. > > We can reduce the CSB access to a set of read/write/buffer pointers and > > treat the various paths identically and not worry about forcewake. > > (Forcewake is nightmare for worstcase latency, and we want to process > > this all with irqsoff -- no latency allowed!) > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/i915/intel_engine_cs.c | 12 --- > > drivers/gpu/drm/i915/intel_lrc.c | 116 ++++++++++-------------- > > drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +++-- > > 3 files changed, 65 insertions(+), 86 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > > index d3264bd6e9dc..7209c22798e6 100644 > > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > > @@ -25,7 +25,6 @@ > > #include <drm/drm_print.h> > > > > #include "i915_drv.h" > > -#include "i915_vgpu.h" > > #include "intel_ringbuffer.h" > > #include "intel_lrc.h" > > > > @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) > > i915_gem_batch_pool_init(&engine->batch_pool, engine); > > } > > > > -static bool csb_force_mmio(struct drm_i915_private *i915) > > -{ > > - /* Older GVT emulation depends upon intercepting CSB mmio */ > > - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) > > - return true; > > - > > - return false; > > -} > > - > > static void intel_engine_init_execlist(struct intel_engine_cs *engine) > > { > > struct intel_engine_execlists * const execlists = &engine->execlists; > > > > - execlists->csb_use_mmio = csb_force_mmio(engine->i915); > > - > > execlists->port_mask = 1; > > BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); > > GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > > index 91656eb2f2db..368a8c74d11d 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -137,6 +137,7 @@ > > #include <drm/i915_drm.h> > > #include "i915_drv.h" > > #include "i915_gem_render_state.h" > > +#include "i915_vgpu.h" > > #include "intel_lrc_reg.h" > > #include "intel_mocs.h" > > #include "intel_workarounds.h" > > @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine) > > { > > struct intel_engine_execlists * const execlists = &engine->execlists; > > struct execlist_port *port = execlists->port; > > - struct drm_i915_private *i915 = engine->i915; > > - > > - /* The HWSP contains a (cacheable) mirror of the CSB */ > > - const u32 *buf = > > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > > - unsigned int head, tail; > > - bool fw = false; > > + const u32 * const buf = execlists->csb_status; > > + u8 head, tail; > > > > /* Clear before reading to catch new interrupts */ > > clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > smp_mb__after_atomic(); > > > > - if (unlikely(execlists->csb_use_mmio)) { > > - intel_uncore_forcewake_get(i915, execlists->fw_domains); > > - fw = true; > > - > > - buf = (u32 * __force) > > - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > > + /* Note that csb_write, csb_status may be either in HWSP or mmio */ > > + head = execlists->csb_head; > > + tail = READ_ONCE(*execlists->csb_write); > > Under GVT when this is emulated mmio I think you need to mask and shift > it with GEN8_CSB_WRITE_PTR. Shift is 0, mask is applied by u8. > > + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); > > + if (unlikely(head == tail)) > > + return; > > > > - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > - tail = GEN8_CSB_WRITE_PTR(head); > > - head = GEN8_CSB_READ_PTR(head); > > - execlists->csb_head = head; > > - } else { > > - const int write_idx = > > - intel_hws_csb_write_index(i915) - > > - I915_HWS_CSB_BUF0_INDEX; > > + rmb(); /* Hopefully paired with a wmb() in HW */ > > > > - head = execlists->csb_head; > > - tail = READ_ONCE(buf[write_idx]); > > - rmb(); /* Hopefully paired with a wmb() in HW */ > > - } > > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > > - engine->name, > > - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > > - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > > - > > - while (head != tail) { > > + do { > > Why convert while to do-while? Early unlikely return above handles the > void process_csb calls? Would the same effect happen if you put unlikely > in the while (head != tail) condition and would be simpler? The earlier return to circumvent the lfence. > > struct i915_request *rq; > > unsigned int status; > > unsigned int count; > > @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine) > > * status notifier. > > */ > > > > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > > GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > > engine->name, head, > > - status, buf[2*head + 1], > > + buf[2 * head + 0], buf[2 * head + 1], > > execlists->active); > > > > + status = buf[2 * head]; > > Why not leave before GEM_TRACE above, as is, and then diff is smaller? You made me correct the GEM_TRACE! Looking at it I still prefer 2 buf[] vs the mixed local/buf[]. > > if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > > GEN8_CTX_STATUS_PREEMPTED)) > > execlists_set_active(execlists, > > @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine) > > } else { > > port_set(port, port_pack(rq, count)); > > } > > - } > > + } while (head != tail); > > > > - if (head != execlists->csb_head) { > > - execlists->csb_head = head; > > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > > - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > - } > > - > > - if (unlikely(fw)) > > - intel_uncore_forcewake_put(i915, execlists->fw_domains); > > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > > + execlists->csb_read); > > + execlists->csb_head = head; > > I guess early return helps to avoid this, but since it is going away in > a following patch maybe it is not worth it. lfence! -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx