gen8_render_writefunc does only constant writes to the framebuffer, no texture reads. Used for testing write bandwidth. gen8_render_readfunc does only reads from the sampler and discards the result. Used for testing sampler read bandwidth. Alpha blend tests and support for more Gens still to come. Signed-off-by: Antti Koskipaa <antti.koskipaa@xxxxxxxxxxxxxxx> --- lib/gen8_render.h | 3 ++ lib/intel_batchbuffer.c | 40 +++++++++++++++++++++++++++ lib/intel_batchbuffer.h | 2 ++ lib/rendercopy.h | 12 ++++++++ lib/rendercopy_gen8.c | 53 +++++++++++++++++++++++++++++++---- shaders/ps/discard.g7a | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ shaders/ps/fill.g7a | 6 ++++ 7 files changed, 184 insertions(+), 5 deletions(-) create mode 100644 shaders/ps/discard.g7a create mode 100644 shaders/ps/fill.g7a diff --git a/lib/gen8_render.h b/lib/gen8_render.h index ba3f9f2..610a457 100644 --- a/lib/gen8_render.h +++ b/lib/gen8_render.h @@ -60,6 +60,9 @@ #define GEN8_3DSTATE_WM_DEPTH_STENCIL GEN6_3D(3, 0, 0x4e) #define GEN8_3DSTATE_PS_EXTRA GEN6_3D(3,0, 0x4f) # define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31) +# define GEN8_PSX_DONT_WRITE_RT (1 << 30) +# define GEN8_PSX_OMASK_PRESENT (1 << 29) +# define GEN8_PSX_KILLS_PIXEL (1 << 28) # define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8) #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN6_3D(3, 0, 0x23) diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c index 692521f..e3cf622 100644 --- a/lib/intel_batchbuffer.c +++ b/lib/intel_batchbuffer.c @@ -748,6 +748,46 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid) } /** + * igt_get_render_writefunc: + * @devid: pci device id + * + * Returns: + * + * The platform-specific render write function pointer for the device + * specified with @devid. Will return NULL when no render copy function is + * implemented. + */ +igt_render_copyfunc_t igt_get_render_writefunc(int devid) +{ + igt_render_copyfunc_t copy = NULL; + + if (IS_GEN8(devid)) + copy = gen8_render_writefunc; + + return copy; +} + +/** + * igt_get_render_readfunc: + * @devid: pci device id + * + * Returns: + * + * The platform-specific render read function pointer for the device + * specified with @devid. Will return NULL when no render copy function is + * implemented. + */ +igt_render_copyfunc_t igt_get_render_readfunc(int devid) +{ + igt_render_copyfunc_t copy = NULL; + + if (IS_GEN8(devid)) + copy = gen8_render_readfunc; + + return copy; +} + +/** * igt_get_media_fillfunc: * @devid: pci device id * diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h index 869747d..7d8f990 100644 --- a/lib/intel_batchbuffer.h +++ b/lib/intel_batchbuffer.h @@ -274,6 +274,8 @@ typedef void (*igt_render_copyfunc_t)(struct intel_batchbuffer *batch, struct igt_buf *dst, unsigned dst_x, unsigned dst_y); igt_render_copyfunc_t igt_get_render_copyfunc(int devid); +igt_render_copyfunc_t igt_get_render_writefunc(int devid); +igt_render_copyfunc_t igt_get_render_readfunc(int devid); /** * igt_fillfunc_t: diff --git a/lib/rendercopy.h b/lib/rendercopy.h index fdc3cab..f4ec74b 100644 --- a/lib/rendercopy.h +++ b/lib/rendercopy.h @@ -53,3 +53,15 @@ void gen2_render_copyfunc(struct intel_batchbuffer *batch, struct igt_buf *src, unsigned src_x, unsigned src_y, unsigned width, unsigned height, struct igt_buf *dst, unsigned dst_x, unsigned dst_y); + +void gen8_render_writefunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + struct igt_buf *dst, unsigned dst_x, unsigned dst_y); + +void gen8_render_readfunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + struct igt_buf *dst, unsigned dst_x, unsigned dst_y); diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c index 4a9a283..b243ed7 100644 --- a/lib/rendercopy_gen8.c +++ b/lib/rendercopy_gen8.c @@ -71,6 +71,28 @@ static const uint32_t ps_kernel_copy[][4] = { #endif }; +/* see shaders/ps/discard.g7a */ +static const uint32_t ps_kernel_read[][4] = { + { 0x0060005a, 0x21403ae8, 0x3a0000c0, 0x008d0040 }, + { 0x0060005a, 0x21603ae8, 0x3a0000c0, 0x008d0080 }, + { 0x0060005a, 0x21803ae8, 0x3a0000d0, 0x008d0040 }, + { 0x0060005a, 0x21a03ae8, 0x3a0000d0, 0x008d0080 }, + { 0x02800031, 0x2e4022e8, 0x0e000140, 0x08840001 }, + { 0x00000001, 0x2e020e08, 0x08000000, 0x00000000 }, + { 0x00000001, 0x2e000e08, 0x08000000, 0x00000000 }, + { 0x00000001, 0x2e010e08, 0x08000000, 0x00000000 }, + { 0x05800031, 0x200022e0, 0x0e000e00, 0x920b1000 }, +}; + +/* see shaders/ps/fill.g7a */ +static const uint32_t ps_kernel_write[][4] = { + { 0x00800001, 0x2e003ee8, 0x38000000, 0x3f800000 }, + { 0x00800001, 0x2e403ee8, 0x38000000, 0x3f7d70a4 }, + { 0x00800001, 0x2e803ee8, 0x38000000, 0x3f000000 }, + { 0x00800001, 0x2ec03ee8, 0x38000000, 0x3dcccccd }, + { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, +}; + /* AUB annotation support */ #define MAX_ANNOTATIONS 33 struct annotations_context { @@ -779,7 +801,7 @@ gen8_emit_sf(struct intel_batchbuffer *batch) } static void -gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) { +gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel, bool kills_pixel) { const int max_threads = 63; OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2)); @@ -819,7 +841,8 @@ gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) { OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); - OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE); + OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE | + (kills_pixel ? (GEN8_PSX_KILLS_PIXEL | GEN8_PSX_DONT_WRITE_RT) : 0)); } static void @@ -925,7 +948,7 @@ static void _gen8_render_func(struct intel_batchbuffer *batch, struct igt_buf *src, unsigned src_x, unsigned src_y, unsigned width, unsigned height, struct igt_buf *dst, unsigned dst_x, unsigned dst_y, - const uint32_t ps_kernel[][4], int kernel_size) + const uint32_t ps_kernel[][4], int kernel_size, bool discard) { struct annotations_context aub_annotations; uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table; @@ -1001,7 +1024,7 @@ static void _gen8_render_func(struct intel_batchbuffer *batch, OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS); OUT_BATCH(ps_sampler_state); - gen8_emit_ps(batch, ps_kernel_off); + gen8_emit_ps(batch, ps_kernel_off, discard); OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS); OUT_BATCH(scissor_state); @@ -1039,5 +1062,25 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, struct igt_buf *dst, unsigned dst_x, unsigned dst_y) { _gen8_render_func(batch, context, src, src_x, src_y, width, height, - dst, dst_x, dst_y, ps_kernel_copy, sizeof(ps_kernel_copy)); + dst, dst_x, dst_y, ps_kernel_copy, sizeof(ps_kernel_copy), false); +} + +void gen8_render_writefunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + struct igt_buf *dst, unsigned dst_x, unsigned dst_y) +{ + _gen8_render_func(batch, context, src, src_x, src_y, width, height, + dst, dst_x, dst_y, ps_kernel_write, sizeof(ps_kernel_write), false); +} + +void gen8_render_readfunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + struct igt_buf *dst, unsigned dst_x, unsigned dst_y) +{ + _gen8_render_func(batch, context, src, src_x, src_y, width, height, + dst, dst_x, dst_y, ps_kernel_read, sizeof(ps_kernel_read), true); } diff --git a/shaders/ps/discard.g7a b/shaders/ps/discard.g7a new file mode 100644 index 0000000..eafbd60 --- /dev/null +++ b/shaders/ps/discard.g7a @@ -0,0 +1,73 @@ +/* Assemble with ".../intel-gen4asm/src/intel-gen4asm -g 7" */ + + +/* Move pixels into g10-g13. The pixel shaader does not load what you want. It + * loads the input data for a plane function to calculate what you want. The + * following is boiler plate code to move our normalized texture coordinates + * (u,v) into g10-g13. It does this 4 subspans (16 pixels) at a time. + * + * This should do the same thing, but it doesn't work for some reason. + * pln(16) g10 g6<0,1,0>F g2<8,8,1>F { align1 }; + * pln(16) g12 g6.16<1>F g2<8,8,1>F { align1 }; + */ +/* U */ +pln (8) g10<1>F g6.0<0,1,0>F g2.0<8,8,1>F { align1 }; /* pixel 0-7 */ +pln (8) g11<1>F g6.0<0,1,0>F g4.0<8,8,1>F { align1 }; /* pixel 8-15 */ +/* V */ +pln (8) g12<1>F g6.16<0,1,0> g2.0<8,8,1>F { align1 }; /* pixel 0-7 */ +pln (8) g13<1>F g6.16<0,1,0> g4.0<8,8,1>F { align1 }; /* pixel 8-15 */ + + +/* Next the we want the sampler to fetch the src texture (ie. src buffer). This + * is done with a pretty simple send message. The output goes to g112, which is + * exactly what we're supposed to use in our final send message. + * In intel-gen4asm, we should end up parsed by the following rule: + * predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions + * + * Send message descriptor: + * 28:25 = message len = 4 // our 4 registers have 16 pixels + * 24:20 = response len = 8 // Each pixel is RGBA32, so we need 8 registers + * 19:19 = header present = 0 + * 18:17 = SIMD16 = 2 + * 16:12 = TYPE = 0 (regular sample) + * 11:08 = Sampler index = ignored/0 + * 7:0 = binding table index = src = 1 + * 0x8840001 + * + * Send message extra descriptor + * 5:5 = End of Thread = 0 + * 3:0 = Target Function ID = SFID_SAMPLER (2) + * 0x2 + */ + +send(16) g114 g10 0x2 0x8840001 { align1 }; + +/* Next discard the result. This is done by using a send message to the pixel + * data port with all the output masks set to 0. These are in the message header, + * in dword g112.2. + */ +mov(1) g112.2<1>UD 0x00000000 { align1 }; + +/* Set pixel offsets in the header to 0 */ +mov(1) g112.0<1>UD 0 { align1 }; +mov(1) g112.1<1>UD 0 { align1 }; + +/* Send message descriptor: + * 28:25 = message len = 12 // 16 pixels RGBA32 + header + * 24:20 = response len = 0 + * 19:19 = header present = 1 + * 17:14 = message type = Render Target Write (12) + * 12:12 = Last Render Target Select = 1 + * 10:08 = Message Type = SIMD16 (0) + * 07:00 = Binding Table Index = dest = 0 + * 0x120B1000 + * + * Send message extra descriptor + * 5:5 = End of Thread = 1 + * 3:0 = Target Function ID = SFID_DP_RC (5) + * 0x25 + */ + +send(16) null g112 0x25 0x120B1000 { align1, EOT }; + +/* vim: set ft=c ts=4 sw=2 tw=80 et: */ diff --git a/shaders/ps/fill.g7a b/shaders/ps/fill.g7a new file mode 100644 index 0000000..89f130f --- /dev/null +++ b/shaders/ps/fill.g7a @@ -0,0 +1,6 @@ +mov (16) g112<1>F 1.0F { align1 }; +mov (16) g114<1>F 0.99F { align1 }; +mov (16) g116<1>F 0.5F { align1 }; +mov (16) g118<1>F 0.1F { align1 }; +send (16) null g112 0x25 0x10031000 { align1, EOT }; +/* <8,8,1>F render RT write SIMD16 LastRT Surface = 0 mlen 8 rlen 0 { align1 1H EOT }; */ -- 2.3.6 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx