On Tue, May 06, 2014 at 04:39:01PM +0300, Mika Kuoppala wrote: > Generate valid (null) render state for each gen. Output > it as a c source file with batch and relocations. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> With the GT3 URB allocation restriction added, this series is Acked-by: Damien Lespiau <damien.lespiau@xxxxxxxxx> Can't really promote that to a r-b tag in good faith. -- Damien > --- > configure.ac | 1 + > lib/gen6_render.h | 1 + > lib/gen7_render.h | 1 + > tools/Makefile.am | 4 +- > tools/null_state_gen/Makefile.am | 16 + > tools/null_state_gen/intel_batchbuffer.c | 173 ++++++ > tools/null_state_gen/intel_batchbuffer.h | 91 +++ > tools/null_state_gen/intel_null_state_gen.c | 151 +++++ > tools/null_state_gen/intel_renderstate_gen7.c | 505 ++++++++++++++++ > tools/null_state_gen/intel_renderstate_gen8.c | 764 +++++++++++++++++++++++++ > 10 files changed, 1706 insertions(+), 1 deletion(-) > create mode 100644 tools/null_state_gen/Makefile.am > create mode 100644 tools/null_state_gen/intel_batchbuffer.c > create mode 100644 tools/null_state_gen/intel_batchbuffer.h > create mode 100644 tools/null_state_gen/intel_null_state_gen.c > create mode 100644 tools/null_state_gen/intel_renderstate_gen7.c > create mode 100644 tools/null_state_gen/intel_renderstate_gen8.c > > diff --git a/configure.ac b/configure.ac > index b71b100..b848ac3 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -211,6 +211,7 @@ AC_CONFIG_FILES([ > tests/Makefile > tools/Makefile > tools/quick_dump/Makefile > + tools/null_state_gen/Makefile > debugger/Makefile > debugger/system_routine/Makefile > assembler/Makefile > diff --git a/lib/gen6_render.h b/lib/gen6_render.h > index 60dc93e..495cc2e 100644 > --- a/lib/gen6_render.h > +++ b/lib/gen6_render.h > @@ -152,6 +152,7 @@ > #define VB0_VERTEXDATA (0 << 20) > #define VB0_INSTANCEDATA (1 << 20) > #define VB0_BUFFER_PITCH_SHIFT 0 > +#define VB0_NULL_VERTEX_BUFFER (1 << 13) > > /* VERTEX_ELEMENT_STATE Structure */ > #define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ > diff --git a/lib/gen7_render.h b/lib/gen7_render.h > index 1661d4c..992d839 100644 > --- a/lib/gen7_render.h > +++ b/lib/gen7_render.h > @@ -165,6 +165,7 @@ > #define GEN7_VB0_VERTEXDATA (0 << 20) > #define GEN7_VB0_INSTANCEDATA (1 << 20) > #define GEN7_VB0_BUFFER_PITCH_SHIFT 0 > +#define GEN7_VB0_NULL_VERTEX_BUFFER (1 << 13) > #define GEN7_VB0_ADDRESS_MODIFY_ENABLE (1 << 14) > > /* VERTEX_ELEMENT_STATE Structure */ > diff --git a/tools/Makefile.am b/tools/Makefile.am > index 151092b..64fa060 100644 > --- a/tools/Makefile.am > +++ b/tools/Makefile.am > @@ -1,7 +1,9 @@ > include Makefile.sources > > +SUBDIRS = null_state_gen > + > if HAVE_DUMPER > -SUBDIRS = quick_dump > +SUBDIRS += quick_dump > endif > > AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib > diff --git a/tools/null_state_gen/Makefile.am b/tools/null_state_gen/Makefile.am > new file mode 100644 > index 0000000..40d2237 > --- /dev/null > +++ b/tools/null_state_gen/Makefile.am > @@ -0,0 +1,16 @@ > +bin_PROGRAMS = intel_null_state_gen > + > +intel_null_state_gen_SOURCES = \ > + intel_batchbuffer.c \ > + intel_renderstate_gen6.c \ > + intel_renderstate_gen7.c \ > + intel_renderstate_gen8.c \ > + intel_null_state_gen.c > + > +gens := 6 7 8 > + > +h = /tmp/intel_renderstate_gen$$gen.c > +state_headers: intel_null_state_gen > + for gen in $(gens); do \ > + ./intel_null_state_gen $$gen >$(h) ;\ > + done > diff --git a/tools/null_state_gen/intel_batchbuffer.c b/tools/null_state_gen/intel_batchbuffer.c > new file mode 100644 > index 0000000..62e052a > --- /dev/null > +++ b/tools/null_state_gen/intel_batchbuffer.c > @@ -0,0 +1,173 @@ > +/************************************************************************** > + * > + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. > + * All Rights Reserved. > + * > + * Copyright 2014 Intel Corporation > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS > + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. > + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR > + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, > + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE > + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + **************************************************************************/ > + > +#include <stdio.h> > +#include <string.h> > +#include <errno.h> > + > +#include "intel_batchbuffer.h" > + > +int intel_batch_reset(struct intel_batchbuffer *batch, > + void *p, > + uint32_t size, > + uint32_t off) > +{ > + batch->err = -EINVAL; > + batch->base = batch->base_ptr = p; > + batch->state_base = batch->state_ptr = p; > + > + if (off >= size || ALIGN(off, 4) != off) > + return -EINVAL; > + > + batch->size = size; > + > + batch->state_base = batch->state_ptr = &batch->base[off]; > + > + batch->num_relocs = 0; > + batch->err = 0; > + > + return batch->err; > +} > + > +uint32_t intel_batch_state_used(struct intel_batchbuffer *batch) > +{ > + return batch->state_ptr - batch->state_base; > +} > + > +uint32_t intel_batch_state_offset(struct intel_batchbuffer *batch) > +{ > + return batch->state_ptr - batch->base; > +} > + > +void *intel_batch_state_alloc(struct intel_batchbuffer *batch, > + uint32_t size, > + uint32_t align) > +{ > + uint32_t cur; > + uint32_t offset; > + > + if (batch->err) > + return NULL; > + > + cur = intel_batch_state_offset(batch); > + offset = ALIGN(cur, align); > + > + if (offset + size > batch->size) { > + batch->err = -ENOSPC; > + return NULL; > + } > + > + batch->state_ptr = batch->base + offset + size; > + > + memset(batch->base + cur, 0, size); > + > + return batch->base + offset; > +} > + > +int intel_batch_offset(struct intel_batchbuffer *batch, const void *ptr) > +{ > + return (uint8_t *)ptr - batch->base; > +} > + > +int intel_batch_state_copy(struct intel_batchbuffer *batch, > + const void *ptr, > + const uint32_t size, > + const uint32_t align) > +{ > + void * const p = intel_batch_state_alloc(batch, size, align); > + > + if (p == NULL) > + return -1; > + > + return intel_batch_offset(batch, memcpy(p, ptr, size)); > +} > + > +uint32_t intel_batch_cmds_used(struct intel_batchbuffer *batch) > +{ > + return batch->base_ptr - batch->base; > +} > + > +uint32_t intel_batch_total_used(struct intel_batchbuffer *batch) > +{ > + return batch->state_ptr - batch->base; > +} > + > +static uint32_t intel_batch_space(struct intel_batchbuffer *batch) > +{ > + return batch->state_base - batch->base_ptr; > +} > + > +int intel_batch_emit_dword(struct intel_batchbuffer *batch, uint32_t dword) > +{ > + uint32_t offset; > + > + if (batch->err) > + return -1; > + > + if (intel_batch_space(batch) < 4) { > + batch->err = -ENOSPC; > + return -1; > + } > + > + offset = intel_batch_offset(batch, batch->base_ptr); > + > + *(uint32_t *) (batch->base_ptr) = dword; > + batch->base_ptr += 4; > + > + return offset; > +} > + > +int intel_batch_emit_reloc(struct intel_batchbuffer *batch, > + const uint32_t delta) > +{ > + uint32_t offset; > + > + if (batch->err) > + return -1; > + > + if (delta >= batch->size) { > + batch->err = -EINVAL; > + return -1; > + } > + > + offset = intel_batch_emit_dword(batch, delta); > + > + if (batch->err) > + return -1; > + > + if (batch->num_relocs >= MAX_RELOCS) { > + batch->err = -ENOSPC; > + return -1; > + } > + > + batch->relocs[batch->num_relocs++] = offset; > + > + return offset; > +} > diff --git a/tools/null_state_gen/intel_batchbuffer.h b/tools/null_state_gen/intel_batchbuffer.h > new file mode 100644 > index 0000000..f5c29db > --- /dev/null > +++ b/tools/null_state_gen/intel_batchbuffer.h > @@ -0,0 +1,91 @@ > +/************************************************************************** > + * > + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. > + * All Rights Reserved. > + * > + * Copyright 2014 Intel Corporation > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS > + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. > + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR > + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, > + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE > + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + **************************************************************************/ > + > +#ifndef _INTEL_BATCHBUFFER_H > +#define _INTEL_BATCHBUFFER_H > + > +#include <stdint.h> > + > +#define MAX_RELOCS 64 > +#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1)) > + > +struct intel_batchbuffer { > + int err; > + uint8_t *base; > + uint8_t *base_ptr; > + uint8_t *state_base; > + uint8_t *state_ptr; > + int size; > + > + uint32_t relocs[MAX_RELOCS]; > + uint32_t num_relocs; > +}; > + > +#define OUT_BATCH(d) intel_batch_emit_dword(batch, d) > +#define OUT_RELOC(batch, read_domains, write_domain, delta) \ > + intel_batch_emit_reloc(batch, delta) > + > +int intel_batch_reset(struct intel_batchbuffer *batch, > + void *p, > + uint32_t size, uint32_t split_off); > + > +uint32_t intel_batch_state_used(struct intel_batchbuffer *batch); > + > +void *intel_batch_state_alloc(struct intel_batchbuffer *batch, > + uint32_t size, > + uint32_t align); > + > +int intel_batch_offset(struct intel_batchbuffer *batch, const void *ptr); > + > +int intel_batch_state_copy(struct intel_batchbuffer *batch, > + const void *ptr, > + const uint32_t size, > + const uint32_t align); > + > +uint32_t intel_batch_cmds_used(struct intel_batchbuffer *batch); > + > +int intel_batch_emit_dword(struct intel_batchbuffer *batch, uint32_t dword); > + > +int intel_batch_emit_reloc(struct intel_batchbuffer *batch, > + const uint32_t delta); > + > +uint32_t intel_batch_total_used(struct intel_batchbuffer *batch); > + > +static inline int intel_batch_error(struct intel_batchbuffer *batch) > +{ > + return batch->err; > +} > + > +static inline uint32_t intel_batch_state_start(struct intel_batchbuffer *batch) > +{ > + return batch->state_base - batch->base; > +} > + > +#endif > diff --git a/tools/null_state_gen/intel_null_state_gen.c b/tools/null_state_gen/intel_null_state_gen.c > new file mode 100644 > index 0000000..14f45d3 > --- /dev/null > +++ b/tools/null_state_gen/intel_null_state_gen.c > @@ -0,0 +1,151 @@ > +#include <stdio.h> > +#include <stdlib.h> > +#include <errno.h> > +#include <assert.h> > + > +#include "intel_batchbuffer.h" > + > +#define STATE_ALIGN 64 > + > +extern int gen6_setup_null_render_state(struct intel_batchbuffer *batch); > +extern int gen7_setup_null_render_state(struct intel_batchbuffer *batch); > +extern int gen8_setup_null_render_state(struct intel_batchbuffer *batch); > + > +static void print_usage(char *s) > +{ > + fprintf(stderr, "%s: <gen>\n" > + " gen: gen to generate for (6,7,8)\n", > + s); > +} > + > +static int is_reloc(struct intel_batchbuffer *batch, uint32_t offset) > +{ > + int i; > + > + for (i = 0; i < batch->num_relocs; i++) > + if (batch->relocs[i] == offset) > + return 1; > + > + return 0; > +} > + > +static int print_state(int gen, struct intel_batchbuffer *batch) > +{ > + int i; > + > + printf("#include \"intel_renderstate.h\"\n\n"); > + > + printf("static const u32 gen%d_null_state_relocs[] = {\n", gen); > + for (i = 0; i < batch->num_relocs; i++) { > + printf("\t0x%08x,\n", batch->relocs[i]); > + } > + printf("};\n\n"); > + > + printf("static const u32 gen%d_null_state_batch[] = {\n", gen); > + for (i = 0; i < batch->size; i += 4) { > + const uint32_t *p = (void *)batch->base + i; > + printf("\t0x%08x,", *p); > + > + if (i == intel_batch_cmds_used(batch) - 4) > + printf("\t /* cmds end */"); > + > + if (i == intel_batch_state_start(batch)) > + printf("\t /* state start */"); > + > + > + if (i == intel_batch_state_start(batch) + > + intel_batch_state_used(batch) - 4) > + printf("\t /* state end */"); > + > + if (is_reloc(batch, i)) > + printf("\t /* reloc */"); > + > + printf("\n"); > + } > + printf("};\n\nRO_RENDERSTATE(%d);\n", gen); > + > + return 0; > +} > + > +static int do_generate(int gen) > +{ > + int initial_size = 8192; > + struct intel_batchbuffer batch; > + void *p; > + int ret = -EINVAL; > + uint32_t cmd_len, state_len, size; > + int (*null_state_gen)(struct intel_batchbuffer *batch) = NULL; > + > + p = malloc(initial_size); > + if (p == NULL) > + return -ENOMEM; > + > + assert(ALIGN(initial_size/2, STATE_ALIGN) == initial_size/2); > + > + ret = intel_batch_reset(&batch, p, initial_size, initial_size/2); > + if (ret) > + goto out; > + > + switch (gen) { > + case 6: > + null_state_gen = gen6_setup_null_render_state; > + break; > + > + case 7: > + null_state_gen = gen7_setup_null_render_state; > + break; > + > + case 8: > + null_state_gen = gen8_setup_null_render_state; > + break; > + } > + > + if (null_state_gen == NULL) { > + printf("no generator found for %d\n", gen); > + ret = -EINVAL; > + goto out; > + } > + > + ret = null_state_gen(&batch); > + if (ret < 0) > + goto out; > + > + cmd_len = intel_batch_cmds_used(&batch); > + state_len = intel_batch_state_used(&batch); > + > + size = cmd_len + state_len + ALIGN(cmd_len, STATE_ALIGN) - cmd_len; > + > + ret = intel_batch_reset(&batch, p, size, ALIGN(cmd_len, STATE_ALIGN)); > + if (ret) > + goto out; > + > + ret = null_state_gen(&batch); > + if (ret < 0) > + goto out; > + > + assert(cmd_len == intel_batch_cmds_used(&batch)); > + assert(state_len == intel_batch_state_used(&batch)); > + assert(size == ret); > + > + /* Batch buffer needs to end */ > + assert(*(uint32_t *)(p + cmd_len - 4) == (0xA << 23)); > + > + ret = print_state(gen, &batch); > +out: > + free(p); > + > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > +int main(int argc, char *argv[]) > +{ > + if (argc != 2) { > + print_usage(argv[0]); > + return 1; > + } > + > + return do_generate(atoi(argv[1])); > +} > diff --git a/tools/null_state_gen/intel_renderstate_gen7.c b/tools/null_state_gen/intel_renderstate_gen7.c > new file mode 100644 > index 0000000..8fe8a80 > --- /dev/null > +++ b/tools/null_state_gen/intel_renderstate_gen7.c > @@ -0,0 +1,505 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > + > +#include "intel_batchbuffer.h" > +#include <lib/gen7_render.h> > +#include <lib/intel_reg.h> > +#include <stdio.h> > + > +static const uint32_t ps_kernel[][4] = { > + { 0x0080005a, 0x2e2077bd, 0x000000c0, 0x008d0040 }, > + { 0x0080005a, 0x2e6077bd, 0x000000d0, 0x008d0040 }, > + { 0x02800031, 0x21801fa9, 0x008d0e20, 0x08840001 }, > + { 0x00800001, 0x2e2003bd, 0x008d0180, 0x00000000 }, > + { 0x00800001, 0x2e6003bd, 0x008d01c0, 0x00000000 }, > + { 0x00800001, 0x2ea003bd, 0x008d0200, 0x00000000 }, > + { 0x00800001, 0x2ee003bd, 0x008d0240, 0x00000000 }, > + { 0x05800031, 0x20001fa8, 0x008d0e20, 0x90031000 }, > +}; > + > +static uint32_t > +gen7_bind_buf_null(struct intel_batchbuffer *batch) > +{ > + uint32_t *ss; > + > + ss = intel_batch_state_alloc(batch, 8 * sizeof(*ss), 32); > + if (ss == NULL) > + return -1; > + > + ss[0] = 0; > + ss[1] = 0; > + ss[2] = 0; > + ss[3] = 0; > + ss[4] = 0; > + ss[5] = 0; > + ss[6] = 0; > + ss[7] = 0; > + > + return intel_batch_offset(batch, ss); > +} > + > +static void > +gen7_emit_vertex_elements(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | > + ((2 * (1 + 2)) + 1 - 2)); > + > + OUT_BATCH(0 << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | > + GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << > + GEN7_VE0_FORMAT_SHIFT | > + 0 << GEN7_VE0_OFFSET_SHIFT); > + > + OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | > + GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT | > + GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | > + GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); > + > + /* x,y */ > + OUT_BATCH(0 << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | > + GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | > + 0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */ > + OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | > + GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | > + GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | > + GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); > + > + /* s,t */ > + OUT_BATCH(0 << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | > + GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | > + 4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */ > + OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | > + GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | > + GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | > + GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); > +} > + > +static uint32_t > +gen7_create_vertex_buffer(struct intel_batchbuffer *batch) > +{ > + uint16_t *v; > + > + v = intel_batch_state_alloc(batch, 12*sizeof(*v), 8); > + if (v == NULL) > + return -1; > + > + v[0] = 0; > + v[1] = 0; > + v[2] = 0; > + v[3] = 0; > + > + v[4] = 0; > + v[5] = 0; > + v[6] = 0; > + v[7] = 0; > + > + v[8] = 0; > + v[9] = 0; > + v[10] = 0; > + v[11] = 0; > + > + return intel_batch_offset(batch, v); > +} > + > +static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch) > +{ > + uint32_t offset; > + > + offset = gen7_create_vertex_buffer(batch); > + > + OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2)); > + OUT_BATCH(0 << GEN7_VB0_BUFFER_INDEX_SHIFT | > + GEN7_VB0_VERTEXDATA | > + GEN7_VB0_ADDRESS_MODIFY_ENABLE | > + GEN7_VB0_NULL_VERTEX_BUFFER | > + 4*2 << GEN7_VB0_BUFFER_PITCH_SHIFT); > + > + OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset); > + OUT_BATCH(~0); > + OUT_BATCH(0); > +} > + > +static uint32_t > +gen7_bind_surfaces(struct intel_batchbuffer *batch) > +{ > + uint32_t *binding_table; > + > + binding_table = intel_batch_state_alloc(batch, 8, 32); > + if (binding_table == NULL) > + return -1; > + > + binding_table[0] = gen7_bind_buf_null(batch); > + binding_table[1] = gen7_bind_buf_null(batch); > + > + return intel_batch_offset(batch, binding_table); > +} > + > +static void > +gen7_emit_binding_table(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); > + OUT_BATCH(gen7_bind_surfaces(batch)); > +} > + > +static void > +gen7_emit_drawing_rectangle(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); > + /* Purposedly set min > max for null rectangle */ > + OUT_BATCH(0xffffffff); > + OUT_BATCH(0 | 0); > + OUT_BATCH(0); > +} > + > +static uint32_t > +gen7_create_blend_state(struct intel_batchbuffer *batch) > +{ > + struct gen7_blend_state *blend; > + > + blend = intel_batch_state_alloc(batch, sizeof(*blend), 64); > + if (blend == NULL) > + return -1; > + > + blend->blend0.dest_blend_factor = GEN7_BLENDFACTOR_ZERO; > + blend->blend0.source_blend_factor = GEN7_BLENDFACTOR_ONE; > + blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD; > + blend->blend1.post_blend_clamp_enable = 1; > + blend->blend1.pre_blend_clamp_enable = 1; > + > + return intel_batch_offset(batch, blend); > +} > + > +static void > +gen7_emit_state_base_address(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); > + OUT_BATCH(0); > + OUT_RELOC(batch, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); > + OUT_RELOC(batch, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); > + OUT_BATCH(0); > + OUT_RELOC(batch, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); > + > + OUT_BATCH(0); > + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); > + OUT_BATCH(0); > + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); > +} > + > +static uint32_t > +gen7_create_cc_viewport(struct intel_batchbuffer *batch) > +{ > + struct gen7_cc_viewport *vp; > + > + vp = intel_batch_state_alloc(batch, sizeof(*vp), 32); > + if (vp == NULL) > + return -1; > + > + vp->min_depth = -1.e35; > + vp->max_depth = 1.e35; > + > + return intel_batch_offset(batch, vp); > +} > + > +static void > +gen7_emit_cc(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); > + OUT_BATCH(gen7_create_blend_state(batch)); > + > + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); > + OUT_BATCH(gen7_create_cc_viewport(batch)); > +} > + > +static uint32_t > +gen7_create_sampler(struct intel_batchbuffer *batch) > +{ > + struct gen7_sampler_state *ss; > + > + ss = intel_batch_state_alloc(batch, sizeof(*ss), 32); > + if (ss == NULL) > + return -1; > + > + ss->ss0.min_filter = GEN7_MAPFILTER_NEAREST; > + ss->ss0.mag_filter = GEN7_MAPFILTER_NEAREST; > + > + ss->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; > + ss->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; > + ss->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; > + > + ss->ss3.non_normalized_coord = 1; > + > + return intel_batch_offset(batch, ss); > +} > + > +static void > +gen7_emit_sampler(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); > + OUT_BATCH(gen7_create_sampler(batch)); > +} > + > +static void > +gen7_emit_multisample(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2)); > + OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | > + GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2)); > + OUT_BATCH(1); > +} > + > +static void > +gen7_emit_urb(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); > + OUT_BATCH(8); /* in 1KBs */ > + > + /* num of VS entries must be divisible by 8 if size < 9 */ > + OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); > + OUT_BATCH((64 << GEN7_URB_ENTRY_NUMBER_SHIFT) | > + (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | > + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); > + > + OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2)); > + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | > + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); > + > + OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2)); > + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | > + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); > + > + OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2)); > + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | > + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); > +} > + > +static void > +gen7_emit_vs(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2)); > + OUT_BATCH(0); /* no VS kernel */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); /* pass-through */ > +} > + > +static void > +gen7_emit_hs(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2)); > + OUT_BATCH(0); /* no HS kernel */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); /* pass-through */ > +} > + > +static void > +gen7_emit_te(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_ds(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_gs(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2)); > + OUT_BATCH(0); /* no GS kernel */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); /* pass-through */ > +} > + > +static void > +gen7_emit_streamout(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_sf(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE); > + OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_sbe(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2)); > + OUT_BATCH(1 << GEN7_SBE_NUM_OUTPUTS_SHIFT | > + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | > + 1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); > + OUT_BATCH(0); > + OUT_BATCH(0); /* dw4 */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); /* dw8 */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); /* dw12 */ > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_ps(struct intel_batchbuffer *batch) > +{ > + int threads; > + > +#if 0 /* XXX: Do we need separate state for hsw or not */ > + if (IS_HASWELL(batch->dev)) > + threads = 40 << HSW_PS_MAX_THREADS_SHIFT | > + 1 << HSW_PS_SAMPLE_MASK_SHIFT; > + else > +#endif > + threads = 40 << IVB_PS_MAX_THREADS_SHIFT; > + > + OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); > + OUT_BATCH(intel_batch_state_copy(batch, ps_kernel, > + sizeof(ps_kernel), 64)); > + OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT | > + 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); > + OUT_BATCH(0); /* scratch address */ > + OUT_BATCH(threads | > + GEN7_PS_16_DISPATCH_ENABLE | > + GEN7_PS_ATTRIBUTE_ENABLE); > + OUT_BATCH(6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_clip(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); /* pass-through */ > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_wm(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2)); > + OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | > + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_null_depth_buffer(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); > + OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | > + GEN7_DEPTHFORMAT_D32_FLOAT << > + GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); > + OUT_BATCH(0); /* disable depth, stencil and hiz */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +int gen7_setup_null_render_state(struct intel_batchbuffer *batch) > +{ > + int ret; > + > + OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D); > + > + gen7_emit_state_base_address(batch); > + gen7_emit_multisample(batch); > + gen7_emit_urb(batch); > + gen7_emit_vs(batch); > + gen7_emit_hs(batch); > + gen7_emit_te(batch); > + gen7_emit_ds(batch); > + gen7_emit_gs(batch); > + gen7_emit_clip(batch); > + gen7_emit_sf(batch); > + gen7_emit_wm(batch); > + gen7_emit_streamout(batch); > + gen7_emit_null_depth_buffer(batch); > + > + gen7_emit_cc(batch); > + gen7_emit_sampler(batch); > + gen7_emit_sbe(batch); > + gen7_emit_ps(batch); > + gen7_emit_vertex_elements(batch); > + gen7_emit_vertex_buffer(batch); > + gen7_emit_binding_table(batch); > + gen7_emit_drawing_rectangle(batch); > + > + OUT_BATCH(GEN7_3DPRIMITIVE | (7 - 2)); > + OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); > + OUT_BATCH(3); > + OUT_BATCH(0); > + OUT_BATCH(1); /* single instance */ > + OUT_BATCH(0); /* start instance location */ > + OUT_BATCH(0); /* index buffer offset, ignored */ > + > + OUT_BATCH(MI_BATCH_BUFFER_END); > + > + ret = intel_batch_error(batch); > + if (ret == 0) > + ret = intel_batch_total_used(batch); > + > + return ret; > +} > diff --git a/tools/null_state_gen/intel_renderstate_gen8.c b/tools/null_state_gen/intel_renderstate_gen8.c > new file mode 100644 > index 0000000..7e22b24 > --- /dev/null > +++ b/tools/null_state_gen/intel_renderstate_gen8.c > @@ -0,0 +1,764 @@ > +#include "intel_batchbuffer.h" > +#include <lib/gen8_render.h> > +#include <lib/intel_reg.h> > +#include <string.h> > + > +struct { > + uint32_t cc_state; > + uint32_t blend_state; > +} cc; > + > +struct { > + uint32_t cc_state; > + uint32_t sf_clip_state; > +} viewport; > + > +/* see shaders/ps/blit.g7a */ > +static const uint32_t ps_kernel[][4] = { > +#if 1 > + { 0x0060005a, 0x21403ae8, 0x3a0000c0, 0x008d0040 }, > + { 0x0060005a, 0x21603ae8, 0x3a0000c0, 0x008d0080 }, > + { 0x0060005a, 0x21803ae8, 0x3a0000d0, 0x008d0040 }, > + { 0x0060005a, 0x21a03ae8, 0x3a0000d0, 0x008d0080 }, > + { 0x02800031, 0x2e0022e8, 0x0e000140, 0x08840001 }, > + { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, > +#else > + /* Write all -1 */ > + { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 }, > + { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 }, > + { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, > +#endif > +}; > + > +static uint32_t > +gen8_bind_buf_null(struct intel_batchbuffer *batch) > +{ > + struct gen8_surface_state *ss; > + > + ss = intel_batch_state_alloc(batch, sizeof(*ss), 64); > + if (ss == NULL) > + return -1; > + > + memset(ss, 0, sizeof(*ss)); > + > + return intel_batch_offset(batch, ss); > +} > + > +static uint32_t > +gen8_bind_surfaces(struct intel_batchbuffer *batch) > +{ > + uint32_t *binding_table, offset; > + > + binding_table = intel_batch_state_alloc(batch, 8, 32); > + if (binding_table == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, binding_table); > + > + binding_table[0] = > + gen8_bind_buf_null(batch); > + binding_table[1] = > + gen8_bind_buf_null(batch); > + > + return offset; > +} > + > +/* Mostly copy+paste from gen6, except wrap modes moved */ > +static uint32_t > +gen8_create_sampler(struct intel_batchbuffer *batch) { > + struct gen8_sampler_state *ss; > + uint32_t offset; > + > + ss = intel_batch_state_alloc(batch, sizeof(*ss), 64); > + if (ss == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, ss); > + > + ss->ss0.min_filter = GEN6_MAPFILTER_NEAREST; > + ss->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; > + ss->ss3.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; > + ss->ss3.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; > + ss->ss3.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; > + > + /* I've experimented with non-normalized coordinates and using the LD > + * sampler fetch, but couldn't make it work. */ > + ss->ss3.non_normalized_coord = 0; > + > + return offset; > +} > + > +static uint32_t > +gen8_fill_ps(struct intel_batchbuffer *batch, > + const uint32_t kernel[][4], > + size_t size) > +{ > + return intel_batch_state_copy(batch, kernel, size, 64); > +} > + > +/** > + * gen7_fill_vertex_buffer_data populate vertex buffer with data. > + * > + * The vertex buffer consists of 3 vertices to construct a RECTLIST. The 4th > + * vertex is implied (automatically derived by the HW). Each element has the > + * destination offset, and the normalized texture offset (src). The rectangle > + * itself will span the entire subsurface to be copied. > + * > + * see gen6_emit_vertex_elements > + */ > +static uint32_t > +gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch) > +{ > + uint16_t *start; > + > + start = intel_batch_state_alloc(batch, 2 * sizeof(*start), 8); > + start[0] = 0; > + start[1] = 0; > + > + return intel_batch_offset(batch, start); > +} > + > +/** > + * gen6_emit_vertex_elements - The vertex elements describe the contents of the > + * vertex buffer. We pack the vertex buffer in a semi weird way, conforming to > + * what gen6_rendercopy did. The most straightforward would be to store > + * everything as floats. > + * > + * see gen7_fill_vertex_buffer_data() for where the corresponding elements are > + * packed. > + */ > +static void > +gen6_emit_vertex_elements(struct intel_batchbuffer *batch) { > + /* > + * The VUE layout > + * dword 0-3: pad (0, 0, 0. 0) > + * dword 4-7: position (x, y, 0, 1.0), > + * dword 8-11: texture coordinate 0 (u0, v0, 0, 1.0) > + */ > + OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | (3 * 2 + 1 - 2)); > + > + /* Element state 0. These are 4 dwords of 0 required for the VUE format. > + * We don't really know or care what they do. > + */ > + OUT_BATCH(0 << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | > + GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | > + 0 << VE0_OFFSET_SHIFT); /* we specify 0, but it's really does not exist */ > + OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | > + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | > + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | > + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); > + > + /* Element state 1 - Our "destination" vertices. These are passed down > + * through the pipeline, and eventually make it to the pixel shader as > + * the offsets in the destination surface. It's packed as the 16 > + * signed/scaled because of gen6 rendercopy. I see no particular reason > + * for doing this though. > + */ > + OUT_BATCH(0 << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | > + GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | > + 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ > + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | > + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | > + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | > + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); > + > + /* Element state 2. Last but not least we store the U,V components as > + * normalized floats. These will be used in the pixel shader to sample > + * from the source buffer. > + */ > + OUT_BATCH(0 << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | > + GEN6_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT | > + 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */ > + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | > + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | > + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | > + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); > +} > + > +/** > + * gen7_emit_vertex_buffer emit the vertex buffers command > + * > + * @batch > + * @offset - bytw offset within the @batch where the vertex buffer starts. > + */ > +static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch, > + uint32_t offset) { > + OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | (1 + (4 * 1) - 2)); > + OUT_BATCH(0 << VB0_BUFFER_INDEX_SHIFT | /* VB 0th index */ > + GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */ > + VB0_NULL_VERTEX_BUFFER | > + 0 << VB0_BUFFER_PITCH_SHIFT); > + OUT_RELOC(batch, I915_GEM_DOMAIN_VERTEX, 0, offset); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static uint32_t > +gen6_create_cc_state(struct intel_batchbuffer *batch) > +{ > + struct gen6_color_calc_state *cc_state; > + uint32_t offset; > + > + cc_state = intel_batch_state_alloc(batch, sizeof(*cc_state), 64); > + if (cc_state == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, cc_state); > + > + return offset; > +} > + > +static uint32_t > +gen8_create_blend_state(struct intel_batchbuffer *batch) > +{ > + struct gen8_blend_state *blend; > + int i; > + uint32_t offset; > + > + blend = intel_batch_state_alloc(batch, sizeof(*blend), 64); > + if (blend == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, blend); > + > + for (i = 0; i < 16; i++) { > + blend->bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO; > + blend->bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE; > + blend->bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD; > + blend->bs[i].pre_blend_color_clamp = 1; > + blend->bs[i].color_buffer_blend = 0; > + } > + > + return offset; > +} > + > +static uint32_t > +gen6_create_cc_viewport(struct intel_batchbuffer *batch) > +{ > + struct gen6_cc_viewport *vp; > + uint32_t offset; > + > + vp = intel_batch_state_alloc(batch, sizeof(*vp), 32); > + if (vp == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, vp); > + > + /* XXX I don't understand this */ > + vp->min_depth = -1.e35; > + vp->max_depth = 1.e35; > + > + return offset; > +} > + > +static uint32_t > +gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch) { > + /* XXX these are likely not needed */ > + struct gen7_sf_clip_viewport *scv_state; > + uint32_t offset; > + > + scv_state = intel_batch_state_alloc(batch, sizeof(*scv_state), 64); > + if (scv_state == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, scv_state); > + > + scv_state->guardband.xmin = 0; > + scv_state->guardband.xmax = 1.0f; > + scv_state->guardband.ymin = 0; > + scv_state->guardband.ymax = 1.0f; > + > + return offset; > +} > + > +static uint32_t > +gen6_create_scissor_rect(struct intel_batchbuffer *batch) > +{ > + struct gen6_scissor_rect *scissor; > + uint32_t offset; > + > + scissor = intel_batch_state_alloc(batch, sizeof(*scissor), 64); > + if (scissor == NULL) > + return -1; > + > + offset = intel_batch_offset(batch, scissor); > + > + return offset; > +} > + > +static void > +gen8_emit_sip(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN6_STATE_SIP | (3 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_push_constants(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS); > + OUT_BATCH(0); > + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS); > + OUT_BATCH(0); > + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS); > + OUT_BATCH(0); > + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS); > + OUT_BATCH(0); > + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_state_base_address(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2)); > + > + /* general */ > + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); > + OUT_BATCH(0); > + > + /* stateless data port */ > + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); > + > + /* surface */ > + OUT_RELOC(batch, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY); > + OUT_BATCH(0); > + > + /* dynamic */ > + OUT_RELOC(batch, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, > + 0, BASE_ADDRESS_MODIFY); > + OUT_BATCH(0); > + > + /* indirect */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + /* instruction */ > + OUT_RELOC(batch, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); > + OUT_BATCH(0); > + > + /* general state buffer size */ > + OUT_BATCH(0xfffff000 | 1); > + /* dynamic state buffer size */ > + OUT_BATCH(1 << 12 | 1); > + /* indirect object buffer size */ > + OUT_BATCH(0xfffff000 | 1); > + /* intruction buffer size */ > + OUT_BATCH(1 << 12 | 1); > +} > + > +static void > +gen7_emit_urb(struct intel_batchbuffer *batch) { > + /* XXX: Min valid values from mesa */ > + const int vs_entries = 64; > + const int vs_size = 2; > + const int vs_start = 2; > + > + OUT_BATCH(GEN7_3DSTATE_URB_VS); > + OUT_BATCH(vs_entries | ((vs_size - 1) << 16) | (vs_start << 25)); > + OUT_BATCH(GEN7_3DSTATE_URB_GS); > + OUT_BATCH(vs_start << 25); > + OUT_BATCH(GEN7_3DSTATE_URB_HS); > + OUT_BATCH(vs_start << 25); > + OUT_BATCH(GEN7_3DSTATE_URB_DS); > + OUT_BATCH(vs_start << 25); > +} > + > +static void > +gen8_emit_cc(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS); > + OUT_BATCH(cc.blend_state | 1); > + > + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS); > + OUT_BATCH(cc.cc_state | 1); > +} > + > +static void > +gen8_emit_multisample(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK); > + OUT_BATCH(1); > +} > + > +static void > +gen8_emit_vs(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (11 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN6_3DSTATE_VS | (9-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_hs(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_HS | (9-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_gs(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (11 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_GS | (10-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_ds(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (11 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_DS | (9-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_wm_hz_op(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_null_state(struct intel_batchbuffer *batch) { > + gen8_emit_wm_hz_op(batch); > + gen8_emit_hs(batch); > + OUT_BATCH(GEN7_3DSTATE_TE | (4-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + gen8_emit_gs(batch); > + gen8_emit_ds(batch); > + gen8_emit_vs(batch); > +} > + > +static void > +gen7_emit_clip(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); /* pass-through */ > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_sf(struct intel_batchbuffer *batch) > +{ > + int i; > + > + OUT_BATCH(GEN7_3DSTATE_SBE | (4 - 2)); > + OUT_BATCH(1 << GEN7_SBE_NUM_OUTPUTS_SHIFT | > + GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | > + GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET | > + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | > + 1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); > + for (i = 0; i < 8; i++) > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); > + OUT_BATCH(GEN8_RASTER_FRONT_WINDING_CCW | GEN8_RASTER_CULL_NONE); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) { > + const int max_threads = 63; > + > + OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2)); > + OUT_BATCH(/* XXX: I don't understand the BARYCENTRIC stuff, but it > + * appears we need it to put our setup data in the place we > + * expect (g6, see below) */ > + GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC); > + > + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (11-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_PS | (12-2)); > + OUT_BATCH(kernel); > + OUT_BATCH(0); /* kernel hi */ > + OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF | > + 2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); > + OUT_BATCH(0); /* scratch space stuff */ > + OUT_BATCH(0); /* scratch hi */ > + OUT_BATCH((max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT | > + GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); > + OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); > + OUT_BATCH(0); // kernel 1 > + OUT_BATCH(0); /* kernel 1 hi */ > + OUT_BATCH(0); // kernel 2 > + OUT_BATCH(0); /* kernel 2 hi */ > + > + OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); > + OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); > + > + OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); > + OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE); > +} > + > +static void > +gen8_emit_depth(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (8-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static void > +gen7_emit_clear(struct intel_batchbuffer *batch) { > + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3-2)); > + OUT_BATCH(0); > + OUT_BATCH(1); // clear valid > +} > + > +static void > +gen6_emit_drawing_rectangle(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); > + OUT_BATCH(0xffffffff); > + OUT_BATCH(0 | 0); > + OUT_BATCH(0); > +} > + > +static void gen8_emit_vf_topology(struct intel_batchbuffer *batch) > +{ > + OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY); > + OUT_BATCH(_3DPRIM_RECTLIST); > +} > + > +/* Vertex elements MUST be defined before this according to spec */ > +static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset) > +{ > + OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN6_3DPRIMITIVE | (7-2)); > + OUT_BATCH(0); /* gen8+ ignore the topology type field */ > + OUT_BATCH(3); /* vertex count */ > + OUT_BATCH(0); /* We're specifying this instead with offset in GEN6_3DSTATE_VERTEX_BUFFERS */ > + OUT_BATCH(1); /* single instance */ > + OUT_BATCH(0); /* start instance location */ > + OUT_BATCH(0); /* index buffer offset, ignored */ > +} > + > +int gen8_setup_null_render_state(struct intel_batchbuffer *batch) > +{ > + uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table; > + uint32_t scissor_state; > + uint32_t vertex_buffer; > + uint32_t batch_end; > + int ret; > + > + ps_binding_table = gen8_bind_surfaces(batch); > + ps_sampler_state = gen8_create_sampler(batch); > + ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel)); > + vertex_buffer = gen7_fill_vertex_buffer_data(batch); > + cc.cc_state = gen6_create_cc_state(batch); > + cc.blend_state = gen8_create_blend_state(batch); > + viewport.cc_state = gen6_create_cc_viewport(batch); > + viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch); > + scissor_state = gen6_create_scissor_rect(batch); > + /* TODO: theree is other state which isn't setup */ > + > + /* Start emitting the commands. The order roughly follows the mesa blorp > + * order */ > + OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); > + > + gen8_emit_sip(batch); > + > + gen7_emit_push_constants(batch); > + > + gen8_emit_state_base_address(batch); > + > + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC); > + OUT_BATCH(viewport.cc_state); > + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); > + OUT_BATCH(viewport.sf_clip_state); > + > + gen7_emit_urb(batch); > + > + gen8_emit_cc(batch); > + > + gen8_emit_multisample(batch); > + > + gen8_emit_null_state(batch); > + > + OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (5-2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + gen7_emit_clip(batch); > + > + gen8_emit_sf(batch); > + > + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS); > + OUT_BATCH(ps_binding_table); > + > + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS); > + OUT_BATCH(ps_sampler_state); > + > + gen8_emit_ps(batch, ps_kernel_off); > + > + OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS); > + OUT_BATCH(scissor_state); > + > + gen8_emit_depth(batch); > + > + gen7_emit_clear(batch); > + > + gen6_emit_drawing_rectangle(batch); > + > + gen7_emit_vertex_buffer(batch, vertex_buffer); > + gen6_emit_vertex_elements(batch); > + > + gen8_emit_vf_topology(batch); > + gen8_emit_primitive(batch, vertex_buffer); > + > + OUT_BATCH(MI_BATCH_BUFFER_END); > + > + ret = intel_batch_error(batch); > + if (ret == 0) > + ret = intel_batch_total_used(batch); > + > + return ret; > +} > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx