I quickly cobbled together a hsw version of this and gave it a whirl on one machine. Seems to work just fine here, and no lockups when switching between hw and sw binding tables. Did you get the lockups on hsw even with rendercopy? Here's my hsw version: >From 17eeb8021815e2c18d6ba9b2185a37904296c2d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@xxxxxxxxxxxxxxx> Date: Wed, 7 May 2014 12:33:01 +0300 Subject: [PATCH] rendercopy: use resource streamer on hsw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> --- lib/gen7_render.h | 16 +++++++- lib/rendercopy_gen7.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 4 deletions(-) diff --git a/lib/gen7_render.h b/lib/gen7_render.h index 1661d4c..58a88ef 100644 --- a/lib/gen7_render.h +++ b/lib/gen7_render.h @@ -155,8 +155,11 @@ #define GEN7_PIPE_CONTROL_IS_FLUSH (1 << 11) #define GEN7_PIPE_CONTROL_TC_FLUSH (1 << 10) #define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) -#define GEN7_PIPE_CONTROL_GLOBAL_GTT (1 << 2) -#define GEN7_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN7_PIPE_CONTROL_FLUSH (1 << 7) +#define GEN7_PIPE_CONTROL_DC_FLUSH (1 << 5) +#define GEN7_PIPE_CONTROL_VF_INVALIDATE (1 << 4) +#define GEN7_PIPE_CONTROL_CC_INVALIDATE (1 << 2) +#define GEN7_PIPE_CONTROL_SC_INVALIDATE (1 << 2) #define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) @@ -1361,4 +1364,13 @@ typedef enum { EXTEND_COUNT } sampler_extend_t; +/* HSW+ resource streamer */ +#define HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC GEN7_3D(3, 1, 0x19) +# define BINDING_TABLE_POOL_ENABLE (1 << 11) +#define HSW_3DSTATE_BINDING_TABLE_EDIT_VS GEN7_3D(3, 0, 0x43) +#define HSW_3DSTATE_BINDING_TABLE_EDIT_GS GEN7_3D(3, 0, 0x44) +#define HSW_3DSTATE_BINDING_TABLE_EDIT_HS GEN7_3D(3, 0, 0x45) +#define HSW_3DSTATE_BINDING_TABLE_EDIT_DS GEN7_3D(3, 0, 0x46) +#define HSW_3DSTATE_BINDING_TABLE_EDIT_PS GEN7_3D(3, 0, 0x47) + #endif diff --git a/lib/rendercopy_gen7.c b/lib/rendercopy_gen7.c index 5131d8f..4efccb9 100644 --- a/lib/rendercopy_gen7.c +++ b/lib/rendercopy_gen7.c @@ -21,6 +21,9 @@ #include "gen7_render.h" #include "intel_reg.h" +#ifndef I915_EXEC_RESOURCE_STREAMER +#define I915_EXEC_RESOURCE_STREAMER (1<<13) +#endif static const uint32_t ps_kernel[][4] = { { 0x0080005a, 0x2e2077bd, 0x000000c0, 0x008d0040 }, @@ -73,11 +76,14 @@ gen7_render_flush(struct intel_batchbuffer *batch, drm_intel_context *context, uint32_t batch_end) { int ret; + uint32_t flags = I915_EXEC_RENDER; ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer); + if (batch->use_resource_streamer) + flags |= I915_EXEC_RESOURCE_STREAMER; if (ret == 0) ret = drm_intel_gem_bo_context_exec(batch->bo, context, - batch_end, 0); + batch_end, flags); assert(ret == 0); } @@ -219,6 +225,75 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch, OUT_BATCH(0); } +static void +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable) +{ + if (!enable) { + OUT_BATCH(MI_RS_CONTROL | 0x0); + + OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2)); + /* binding table pool base address */ + OUT_BATCH(3 << 5); + /* Upper bound */ + OUT_BATCH(0); + + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE); + OUT_BATCH(0); + OUT_BATCH(0); + + return; + } + OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2)); + + /* binding table pool base address */ + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, + BINDING_TABLE_POOL_ENABLE | (3 << 5)); + + /* Upper bound */ + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, + batch->hw_bt_pool_bo->size); + + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static uint32_t +gen7_rs_bind_surfaces(struct intel_batchbuffer *batch, + struct igt_buf *src, + struct igt_buf *dst, + uint32_t *surf0, uint32_t *surf1) +{ + *surf0 = gen7_bind_buf(batch, dst, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 1); + *surf1 = gen7_bind_buf(batch, src, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 0); + + return 0; +} + +static void +gen7_rs_edit_surfaces(struct intel_batchbuffer *batch, + uint32_t surf0, uint32_t surf1) +{ + OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2)); + OUT_BATCH(0x3); + { + OUT_BATCH(0 << 16 | surf0 >> 5); + OUT_BATCH(1 << 16 | surf1 >> 5); + } +} + static uint32_t gen7_bind_surfaces(struct intel_batchbuffer *batch, struct igt_buf *src, @@ -241,8 +316,19 @@ gen7_emit_binding_table(struct intel_batchbuffer *batch, struct igt_buf *src, struct igt_buf *dst) { + uint32_t surf0 = 0, surf1 = 1; + uint32_t binding_table; + + if (batch->use_resource_streamer) { + binding_table = gen7_rs_bind_surfaces(batch, src, dst, + &surf0, &surf1); + gen7_rs_edit_surfaces(batch, surf0, surf1); + } else { + binding_table = gen7_bind_surfaces(batch, src, dst); + } + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); - OUT_BATCH(gen7_bind_surfaces(batch, src, dst)); + OUT_BATCH(binding_table); } static void @@ -273,6 +359,9 @@ gen7_create_blend_state(struct intel_batchbuffer *batch) static void gen7_emit_state_base_address(struct intel_batchbuffer *batch) { + if (batch->use_resource_streamer) + OUT_BATCH(MI_RS_CONTROL | 0x0); + OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); OUT_BATCH(0); OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); @@ -284,6 +373,9 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch) OUT_BATCH(0 | BASE_ADDRESS_MODIFY); OUT_BATCH(0); OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + + if (batch->use_resource_streamer) + OUT_BATCH(MI_RS_CONTROL | 0x1); } static uint32_t @@ -545,6 +637,10 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch, OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D); gen7_emit_state_base_address(batch); + + if (batch->use_resource_streamer) + gen7_hw_binding_table(batch, true); + gen7_emit_multisample(batch); gen7_emit_urb(batch); gen7_emit_vs(batch); @@ -576,6 +672,9 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch, OUT_BATCH(0); /* start instance location */ OUT_BATCH(0); /* index buffer offset, ignored */ + if (batch->use_resource_streamer) + gen7_hw_binding_table(batch, false); + OUT_BATCH(MI_BATCH_BUFFER_END); batch_end = batch->ptr - batch->buffer; -- 1.8.3.2 -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx