This batchbuffer is over 4096 bytes, so we need to increase the size of the array (and the KMD has to be modified to deal with more than one page). Notice that there to workarounds embedded here, both applicable to all CNL steppings. v2: WaPSRandomCSNotDone is not A0 only (as per the latest BSpec), so update the comment in the code and in the commit message. Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> Cc: Ben Widawsky <ben@xxxxxxxxxxxx> Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> --- lib/gen10_render.h | 63 +++ tools/null_state_gen/Makefile.am | 3 +- tools/null_state_gen/intel_batchbuffer.h | 2 +- tools/null_state_gen/intel_null_state_gen.c | 5 +- tools/null_state_gen/intel_renderstate.h | 1 + tools/null_state_gen/intel_renderstate_gen10.c | 538 +++++++++++++++++++++++++ 6 files changed, 609 insertions(+), 3 deletions(-) create mode 100644 lib/gen10_render.h create mode 100644 tools/null_state_gen/intel_renderstate_gen10.c diff --git a/lib/gen10_render.h b/lib/gen10_render.h new file mode 100644 index 0000000..f4a7dff --- /dev/null +++ b/lib/gen10_render.h @@ -0,0 +1,63 @@ +#ifndef GEN10_RENDER_H +#define GEN10_RENDER_H + +#include "gen9_render.h" + +#define GEN7_MI_RS_CONTROL (0x6 << 23) +# define GEN7_MI_RS_CONTROL_ENABLE (1 << 0) + +#define GEN10_3DSTATE_GATHER_POOL_ALLOC GEN6_3D(3, 1, 0x1a) +# define GEN10_3DSTATE_GATHER_POOL_ENABLE (1 << 11) + +#define GEN10_3DSTATE_GATHER_CONSTANT_VS GEN6_3D(3, 0, 0x34) +#define GEN10_3DSTATE_GATHER_CONSTANT_HS GEN6_3D(3, 0, 0x36) +#define GEN10_3DSTATE_GATHER_CONSTANT_DS GEN6_3D(3, 0, 0x37) +#define GEN10_3DSTATE_GATHER_CONSTANT_GS GEN6_3D(3, 0, 0x35) +#define GEN10_3DSTATE_GATHER_CONSTANT_PS GEN6_3D(3, 0, 0x38) + +#define GEN10_3DSTATE_WM_DEPTH_STENCIL GEN6_3D(3, 0, 0x4e) +#define GEN10_3DSTATE_WM_CHROMAKEY GEN6_3D(3, 0, 0x4c) + +#define GEN8_REG_L3_CACHE_CONFIG 0x7034 + +/* + * Programming for L3 cache allocations can be made per bank. Based on the + * programmed value HW will apply same allocations on other available banks. + * Total L3 Cache size per bank = 256 KB. + * {SLM, URB, DC, RO(I/S, C, T), L3 Client Pool} + * { 0, 96, 32, 128, 0 } + */ +#define GEN10_L3_CACHE_CONFIG_VALUE 0x00420060 + +#define URB_ALIGN(val, align) ((val % align) ? (val - (val % align)) : val) + +#define GEN10_VS_MIN_NUM_OF_URB_ENTRIES 64 +#define GEN10_VS_MAX_NUM_OF_URB_ENTRIES 2752 + +#define GEN10_KB_PER_URB_INDEX 8 +#define GEN10_L3_URB_SIZE_PER_BANK_IN_KB 96 + +#define GEN10_URB_RESERVED_SIZE_KB 32 +#define GEN10_URB_RESERVED_END_SIZE_KB 8 + +#define GEN10_VS_NUM_BITS_PER_URB_UNIT 512 +#define GEN10_VS_NUM_OF_URB_UNITS 1 // zero based +#define GEN10_VS_URB_ENTRY_SIZE_IN_BITS (GEN10_VS_NUM_BITS_PER_URB_UNIT * \ + (GEN10_VS_NUM_OF_URB_UNITS + 1)) + +#define GEN10_VS_URB_START_INDEX (GEN10_URB_RESERVED_SIZE_KB / GEN10_KB_PER_URB_INDEX) + +#define GEN10_URB_SIZE_PER_SLICE_KB(l3_bank_count, slice_count) \ + URB_ALIGN((uint32_t)(GEN10_L3_URB_SIZE_PER_BANK_IN_KB * l3_bank_count / slice_count), GEN10_KB_PER_URB_INDEX) + +#define GEN10_VS_URB_SIZE_PER_SLICE_KB(total_urb_size_per_slice) \ + (total_urb_size_per_slice - GEN10_URB_RESERVED_SIZE_KB - GEN10_URB_RESERVED_END_SIZE_KB) + +#define GEN10_VS_NUM_URB_ENTRIES_PER_SLICE(total_urb_size_per_slice) \ + ((GEN10_VS_URB_SIZE_PER_SLICE_KB(total_urb_size_per_slice) * \ + 1024 * 8) / GEN10_VS_URB_ENTRY_SIZE_IN_BITS) + +#define GEN10_VS_END_URB_INDEX(urb_size_per_slice) \ + ((urb_size_per_slice - GEN10_URB_RESERVED_END_SIZE_KB) / GEN10_KB_PER_URB_INDEX) + +#endif diff --git a/tools/null_state_gen/Makefile.am b/tools/null_state_gen/Makefile.am index 24884a7..2f90990 100644 --- a/tools/null_state_gen/Makefile.am +++ b/tools/null_state_gen/Makefile.am @@ -12,9 +12,10 @@ intel_null_state_gen_SOURCES = \ intel_renderstate_gen7.c \ intel_renderstate_gen8.c \ intel_renderstate_gen9.c \ + intel_renderstate_gen10.c \ intel_null_state_gen.c -gens := 6 7 8 9 +gens := 6 7 8 9 10 h = /tmp/intel_renderstate_gen$$gen.c states: intel_null_state_gen diff --git a/tools/null_state_gen/intel_batchbuffer.h b/tools/null_state_gen/intel_batchbuffer.h index 771d1c8..e40e01b 100644 --- a/tools/null_state_gen/intel_batchbuffer.h +++ b/tools/null_state_gen/intel_batchbuffer.h @@ -34,7 +34,7 @@ #include <stdint.h> #define MAX_RELOCS 64 -#define MAX_ITEMS 1024 +#define MAX_ITEMS 2048 #define MAX_STRLEN 256 #define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1)) diff --git a/tools/null_state_gen/intel_null_state_gen.c b/tools/null_state_gen/intel_null_state_gen.c index 06eb954..4f12f5f 100644 --- a/tools/null_state_gen/intel_null_state_gen.c +++ b/tools/null_state_gen/intel_null_state_gen.c @@ -41,7 +41,7 @@ static int debug = 0; static void print_usage(char *s) { fprintf(stderr, "%s: <gen>\n" - " gen: gen to generate for (6,7,8,9)\n", + " gen: gen to generate for (6,7,8,9,10)\n", s); } @@ -173,6 +173,9 @@ static int do_generate(int gen) case 9: null_state_gen = gen9_setup_null_render_state; break; + case 10: + null_state_gen = gen10_setup_null_render_state; + break; } if (null_state_gen == NULL) { diff --git a/tools/null_state_gen/intel_renderstate.h b/tools/null_state_gen/intel_renderstate.h index b27b434..b3c8c2b 100644 --- a/tools/null_state_gen/intel_renderstate.h +++ b/tools/null_state_gen/intel_renderstate.h @@ -30,5 +30,6 @@ void gen6_setup_null_render_state(struct intel_batchbuffer *batch); void gen7_setup_null_render_state(struct intel_batchbuffer *batch); void gen8_setup_null_render_state(struct intel_batchbuffer *batch); void gen9_setup_null_render_state(struct intel_batchbuffer *batch); +void gen10_setup_null_render_state(struct intel_batchbuffer *batch); #endif /* __INTEL_RENDERSTATE_H__ */ diff --git a/tools/null_state_gen/intel_renderstate_gen10.c b/tools/null_state_gen/intel_renderstate_gen10.c new file mode 100644 index 0000000..f5678c3 --- /dev/null +++ b/tools/null_state_gen/intel_renderstate_gen10.c @@ -0,0 +1,538 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Oscar Mateo <oscar.mateo@xxxxxxxxx> + */ + +#include "intel_renderstate.h" +#include <lib/gen10_render.h> +#include <lib/intel_reg.h> + +static void gen8_emit_wm(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2)); + OUT_BATCH(GEN7_WM_LEGACY_DIAMOND_LINE_RASTERIZATION); +} + +static void gen8_emit_ps(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN7_3DSTATE_PS | (12 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* kernel hi */ + OUT_BATCH(GEN7_PS_SPF_MODE); + OUT_BATCH(0); /* scratch space stuff */ + OUT_BATCH(0); /* scratch hi */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); // kernel 1 + OUT_BATCH(0); /* kernel 1 hi */ + OUT_BATCH(0); // kernel 2 + OUT_BATCH(0); /* kernel 2 hi */ +} + +static void gen8_emit_sf(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(1 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT | + 1 << GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT | + GEN7_SF_POINT_WIDTH_FROM_SOURCE | + 8); +} + +static void gen8_emit_vs(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN6_3DSTATE_VS | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(GEN7_VS_FLOATING_POINT_MODE_ALTERNATE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void gen8_emit_hs(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN7_3DSTATE_HS | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT); + OUT_BATCH(0); +} + +static void gen8_emit_raster(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); + OUT_BATCH(GEN8_RASTER_CULL_NONE | GEN8_RASTER_FRONT_WINDING_CCW); + OUT_BATCH(0.0); + OUT_BATCH(0.0); + OUT_BATCH(0.0); +} + +static void gen10_emit_urb(struct intel_batchbuffer *batch) +{ + /* Smallest SKU: 3x8*/ + int l3_bank_count = 3; + int slice_count = 1; + int urb_size_per_slice = GEN10_URB_SIZE_PER_SLICE_KB(l3_bank_count, slice_count); + int other_urb_start_addr = GEN10_VS_END_URB_INDEX(urb_size_per_slice); + const int vs_urb_start_addr = GEN10_VS_URB_START_INDEX; + const int vs_urb_alloc_size = GEN10_VS_NUM_OF_URB_UNITS; + int vs_urb_entries = GEN10_VS_NUM_URB_ENTRIES_PER_SLICE(urb_size_per_slice); + + if (vs_urb_entries < GEN10_VS_MIN_NUM_OF_URB_ENTRIES) + vs_urb_entries = GEN10_VS_MIN_NUM_OF_URB_ENTRIES; + if (vs_urb_entries > GEN10_VS_MAX_NUM_OF_URB_ENTRIES) + vs_urb_entries = GEN10_VS_MAX_NUM_OF_URB_ENTRIES; + + OUT_BATCH(GEN7_3DSTATE_URB_VS); + OUT_BATCH(vs_urb_entries | + (vs_urb_alloc_size << 16) | + (vs_urb_start_addr << 25)); + + OUT_BATCH(GEN7_3DSTATE_URB_HS); + OUT_BATCH(other_urb_start_addr << 25); + + OUT_BATCH(GEN7_3DSTATE_URB_DS); + OUT_BATCH(other_urb_start_addr << 25); + + OUT_BATCH(GEN7_3DSTATE_URB_GS); + OUT_BATCH(other_urb_start_addr << 25); +} + +static void gen8_emit_vf_topology(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY); + OUT_BATCH(_3DPRIM_TRILIST); +} + +static void gen8_emit_so_decl_list(struct intel_batchbuffer *batch) +{ + const int num_decls = 128; + int i; + + OUT_BATCH(GEN8_3DSTATE_SO_DECL_LIST | + (((2 * num_decls) + 3) - 2) /* DWORD count - 2 */); + OUT_BATCH(0); + OUT_BATCH(num_decls); + + for (i = 0; i < num_decls; i++) { + OUT_BATCH(0); + OUT_BATCH(0); + } +} + +static void gen8_emit_so_buffer(struct intel_batchbuffer *batch, const int index) +{ + OUT_BATCH(GEN8_3DSTATE_SO_BUFFER | (8 - 2)); + OUT_BATCH(index << 29); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void gen8_emit_chroma_key(struct intel_batchbuffer *batch, const int index) +{ + OUT_BATCH(GEN6_3DSTATE_CHROMA_KEY | (4 - 2)); + OUT_BATCH(index << 30); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void gen8_emit_vertex_buffers(struct intel_batchbuffer *batch) +{ + const int buffers = 33; + int i; + + OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | + (((4 * buffers) + 1)- 2) /* DWORD count - 2 */); + + for (i = 0; i < buffers; i++) { + OUT_BATCH(i << VB0_BUFFER_INDEX_SHIFT | + GEN7_VB0_BUFFER_ADDR_MOD_EN); + OUT_BATCH(0); /* Address */ + OUT_BATCH(0); + OUT_BATCH(0); + } +} + +static void gen8_emit_vertex_elements(struct intel_batchbuffer *batch) +{ + const int elements = 34; + int i; + + OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | + (((2 * elements) + 1) - 2) /* DWORD count - 2 */); + + /* Element 0 */ + OUT_BATCH(VE0_VALID); + OUT_BATCH( + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); + /* Elements 1 -> 33 */ + for (i = 1; i < elements; i++) { + OUT_BATCH(0); + OUT_BATCH(0); + } +} + +static void gen8_emit_cc_state_pointers(struct intel_batchbuffer *batch) +{ + union { + float fval; + uint32_t uval; + } u; + + unsigned offset; + + u.fval = 1.0f; + + offset = intel_batch_state_offset(batch, 64); + OUT_STATE(0); + OUT_STATE(0); /* Alpha reference value */ + OUT_STATE(u.uval); /* Blend constant color RED */ + OUT_STATE(u.uval); /* Blend constant color BLUE */ + OUT_STATE(u.uval); /* Blend constant color GREEN */ + OUT_STATE(u.uval); /* Blend constant color ALPHA */ + + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS); + OUT_BATCH_STATE_OFFSET(offset | 1); +} + +static void gen8_emit_blend_state_pointers(struct intel_batchbuffer *batch) +{ + unsigned offset; + int i; + + offset = intel_batch_state_offset(batch, 64); + + for (i = 0; i < 17; i++) + OUT_STATE(0); + + OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + OUT_BATCH_STATE_OFFSET(offset | 1); +} + +static void gen8_emit_ps_extra(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); + OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | + GEN8_PSX_ATTRIBUTE_ENABLE); + +} + +static void gen8_emit_ps_blend(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); + OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); +} + +static void gen8_emit_viewport_state_pointers_cc(struct intel_batchbuffer *batch) +{ + unsigned offset; + + offset = intel_batch_state_offset(batch, 32); + + OUT_STATE((uint32_t)0.0f); /* Minimum depth */ + OUT_STATE((uint32_t)0.0f); /* Maximum depth */ + + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_BATCH_STATE_OFFSET(offset); +} + +static void gen8_emit_viewport_state_pointers_sf_clip(struct intel_batchbuffer *batch) +{ + unsigned offset; + int i; + + offset = intel_batch_state_offset(batch, 64); + + for (i = 0; i < 16; i++) + OUT_STATE(0); + + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); + OUT_BATCH_STATE_OFFSET(offset); +} + +static void gen8_emit_primitive(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN6_3DPRIMITIVE | (10-2)); + OUT_BATCH(4); /* gen8+ ignore the topology type field */ + OUT_BATCH(1); /* vertex count */ + OUT_BATCH(0); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + OUT_BATCH(0); /* extended parameter 0 */ + OUT_BATCH(0); /* extended parameter 1 */ + OUT_BATCH(0); /* extended parameter 2 */ +} + +static void gen9_emit_state_base_address(struct intel_batchbuffer *batch) { + const unsigned offset = 0; + OUT_BATCH(GEN6_STATE_BASE_ADDRESS | + (22 - 2) /* DWORD count - 2 */); + + /* general state base address - requires BB address + * added to state offset to be stored in this location + */ + OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + + /* stateless data port */ + OUT_BATCH(0); + + /* surface state base address - requires BB address + * added to state offset to be stored in this location + */ + OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + + /* dynamic state base address - requires BB address + * added to state offset to be stored in this location + */ + OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + + /* indirect state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + + /* instruction state base address - requires BB address + * added to state offset to be stored in this location + */ + OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + + /* general state buffer size */ + OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY); + /* dynamic state buffer size */ + OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY); + /* indirect object buffer size */ + OUT_BATCH(0x0 | BUFFER_SIZE_MODIFY); + /* intruction buffer size */ + OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY); + + /* bindless surface state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + /* bindless surface state size */ + OUT_BATCH(0); + + /* bindless sampler state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + /* bindless sampler state size */ + OUT_BATCH(0); +} + +/* + * Generate the batch buffer commands needed to initialize the 3D engine + * to its "golden state". + */ +void gen10_setup_null_render_state(struct intel_batchbuffer *batch) +{ + int i; + + /* WaRsGatherPoolEnable: cnl */ + OUT_BATCH(GEN7_MI_RS_CONTROL); + +#define GEN8_PIPE_CONTROL_GLOBAL_GTT (1 << 24) + /* PIPE_CONTROL */ + OUT_BATCH(GEN6_PIPE_CONTROL | + (6 - 2)); /* DWORD count - 2 */ + OUT_BATCH(GEN8_PIPE_CONTROL_GLOBAL_GTT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + /* PIPELINE_SELECT */ + OUT_BATCH(GEN9_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(GEN8_REG_L3_CACHE_CONFIG); + OUT_BATCH(GEN10_L3_CACHE_CONFIG_VALUE); + + gen8_emit_wm(batch); + gen8_emit_ps(batch); + gen8_emit_sf(batch); + + OUT_CMD(GEN7_3DSTATE_SBE, 6); /* Check w/ Gen8 code */ + OUT_CMD(GEN8_3DSTATE_SBE_SWIZ, 11); + + gen8_emit_vs(batch); + gen8_emit_hs(batch); + + OUT_CMD(GEN7_3DSTATE_GS, 10); + OUT_CMD(GEN7_3DSTATE_STREAMOUT, 5); + OUT_CMD(GEN7_3DSTATE_DS, 11); /* Check w/ Gen8 code */ + OUT_CMD(GEN6_3DSTATE_CLIP, 4); + OUT_CMD(GEN7_3DSTATE_TE, 4); + OUT_CMD(GEN8_3DSTATE_VF, 2); + OUT_CMD(GEN8_3DSTATE_WM_HZ_OP, 5); + + /* URB States */ + gen10_emit_urb(batch); + + OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_VS, 130); + OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_HS, 130); + OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_DS, 130); + OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_GS, 130); + OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_PS, 130); + + OUT_CMD(GEN8_3DSTATE_BIND_TABLE_POOL_ALLOC, 4); + OUT_CMD(GEN8_3DSTATE_GATHER_POOL_ALLOC, 4); + OUT_CMD(GEN8_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC, 4); + + /* Push Constants */ + OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, 2); + OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, 2); + OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, 2); + OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, 2); + OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, 2); + + /* Constants */ + OUT_CMD(GEN6_3DSTATE_CONSTANT_VS, 11); + OUT_CMD(GEN7_3DSTATE_CONSTANT_HS, 11); + OUT_CMD(GEN7_3DSTATE_CONSTANT_DS, 11); + OUT_CMD(GEN7_3DSTATE_CONSTANT_GS, 11); + OUT_CMD(GEN7_3DSTATE_CONSTANT_PS, 11); + + OUT_CMD(GEN8_3DSTATE_VF_INSTANCING, 3); + OUT_CMD(GEN8_3DSTATE_VF_SGVS, 2); + gen8_emit_vf_topology(batch); + + /* Streamer out declaration list */ + gen8_emit_so_decl_list(batch); + + /* Streamer out buffers */ + for (i = 0; i < 4; i++) { + gen8_emit_so_buffer(batch, i); + } + + /* State base addresses */ + gen9_emit_state_base_address(batch); + + OUT_CMD(GEN6_STATE_SIP, 3); + OUT_CMD(GEN6_3DSTATE_DRAWING_RECTANGLE, 4); + OUT_CMD(GEN7_3DSTATE_DEPTH_BUFFER, 8); + + /* Chroma key */ + for (i = 0; i < 4; i++) { + gen8_emit_chroma_key(batch, i); + } + + OUT_CMD(GEN6_3DSTATE_LINE_STIPPLE, 3); + OUT_CMD(GEN6_3DSTATE_AA_LINE_PARAMS, 3); + OUT_CMD(GEN7_3DSTATE_STENCIL_BUFFER, 5); + OUT_CMD(GEN7_3DSTATE_HIER_DEPTH_BUFFER, 5); + OUT_CMD(GEN7_3DSTATE_CLEAR_PARAMS, 3); + OUT_CMD(GEN6_3DSTATE_MONOFILTER_SIZE, 2); + + /* WaPSRandomCSNotDone:cnl */ +#define GEN8_PIPE_CONTROL_STALL_ENABLE (1 << 20) + OUT_BATCH(GEN6_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(GEN8_PIPE_CONTROL_STALL_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_CMD(GEN8_3DSTATE_MULTISAMPLE, 2); + OUT_CMD(GEN8_3DSTATE_POLY_STIPPLE_OFFSET, 2); + OUT_CMD(GEN8_3DSTATE_POLY_STIPPLE_PATTERN, 1 + 32); + OUT_CMD(GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0, 1 + 16); + OUT_CMD(GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1, 1 + 16); + OUT_CMD(GEN6_3DSTATE_INDEX_BUFFER, 5); + + /* Vertex buffers */ + gen8_emit_vertex_buffers(batch); + gen8_emit_vertex_elements(batch); + + OUT_BATCH(GEN6_3DSTATE_VF_STATISTICS | 1 /* Enable */); + + /* 3D state binding table pointers */ + OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, 2); + OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, 2); + OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, 2); + OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, 2); + OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, 2); + + gen8_emit_cc_state_pointers(batch); + gen8_emit_blend_state_pointers(batch); + gen8_emit_ps_extra(batch); + gen8_emit_ps_blend(batch); + + /* 3D state sampler state pointers */ + OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, 2); + OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, 2); + OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, 2); + OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, 2); + OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, 2); + + OUT_CMD(GEN6_3DSTATE_SCISSOR_STATE_POINTERS, 2); + + gen8_emit_viewport_state_pointers_cc(batch); + gen8_emit_viewport_state_pointers_sf_clip(batch); + + /* WaPSRandomCSNotDone:cnl */ +#define GEN8_PIPE_CONTROL_STALL_ENABLE (1 << 20) + OUT_BATCH(GEN6_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(GEN8_PIPE_CONTROL_STALL_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + gen8_emit_raster(batch); + + OUT_CMD(GEN10_3DSTATE_WM_DEPTH_STENCIL, 4); + OUT_CMD(GEN10_3DSTATE_WM_CHROMAKEY, 2); + + /* Launch 3D operation */ + gen8_emit_primitive(batch); + + /* WaRsGatherPoolEnable: cnl */ + OUT_BATCH(GEN7_MI_RS_CONTROL | GEN7_MI_RS_CONTROL_ENABLE); + OUT_BATCH(GEN10_3DSTATE_GATHER_POOL_ALLOC | (4 - 2)); + OUT_BATCH(GEN10_3DSTATE_GATHER_POOL_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0xfffff << 12); + OUT_BATCH(GEN7_MI_RS_CONTROL); + OUT_CMD(GEN10_3DSTATE_GATHER_POOL_ALLOC, 4); + + OUT_BATCH(MI_BATCH_BUFFER_END); +} -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx