This is based on a workaround implemented in the windows driver. I've tried a similar fix for Sandrybridge with no luck, but it is a bit different for IVB. This is pretty experimental at this point; hopefully it helps anyone having any missed IRQ issues ;) Cc: Jesse Barnes <jesse.barnes at intel.com> Cc: Michael Larabel <Michael at phoronix.com> Signed-off-by: Ben Widawsky <benjamin.widawsky at intel.com> --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 47 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cb55444..e925324 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -210,6 +210,7 @@ #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX_SHIFT 2 +#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) /* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw * simply ignores the register load under certain conditions. @@ -217,6 +218,7 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1) +#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ #define MI_INVALIDATE_TLB (1<<18) #define MI_INVALIDATE_BSD (1<<7) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 465a7da..40976d6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -464,6 +464,50 @@ gen6_add_request(struct intel_ring_buffer *ring, return 0; } +static int +ivb_render_add_request(struct intel_ring_buffer *ring, + u32 *seqno) +{ + struct pipe_control *pc = ring->private; + u32 scratch_addr = pc->gtt_offset + 1024; + u32 mbox1_reg; + u32 mbox2_reg; + int ret; + + ret = intel_ring_begin(ring, 16); + if (ret) + return ret; + + mbox1_reg = ring->signal_mbox[0]; + mbox2_reg = ring->signal_mbox[1]; + + *seqno = i915_gem_next_request_seqno(ring); + + update_mboxes(ring, *seqno, mbox1_reg); + update_mboxes(ring, *seqno, mbox2_reg); + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); + intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + intel_ring_emit(ring, *seqno); + + /* Experimental workaround. Doing a load/store of the same reg should + * cause the HW to figure out the hazard and stall the pipeline. + * The choice of CCID is just a random reg of < 40000 that doesn't hae + * much impact. scratch_addr is also random at this point. + */ + intel_ring_emit(ring, MI_STORE_REGISTER_MEM); + intel_ring_emit(ring, CCID); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, MI_LOAD_REGISTER_MEM); + intel_ring_emit(ring, CCID); + intel_ring_emit(ring, scratch_addr); + + + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_advance(ring); + + return 0; +} + /** * intel_ring_sync - sync the waiter to the signaller on seqno * @@ -1459,6 +1503,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->get_seqno = pc_render_get_seqno; } + if (INTEL_INFO(dev)->gen == 7) + ring->add_request = ivb_render_add_request; + if (!I915_NEED_GFX_HWS(dev)) { ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; memset(ring->status_page.page_addr, 0, PAGE_SIZE); -- 1.7.10