>From Mesa. This imports a bit more the of brw_eu* infrastructure (which is going towards the right direction!) from mesa and the update is quite a significant improvement over what we had. I also verified that the changes that were done on the assembler old version of brw_disasm.c were already supported by the Mesa version, and indeed they were. Signed-off-by: Damien Lespiau <damien.lespiau at intel.com> --- assembler/Makefile.am | 4 +- assembler/{disasm.c => brw_disasm.c} | 660 +++++++++++++++++++++++----- assembler/brw_eu.h | 405 +++++++++++++++++ assembler/brw_reg.h | 808 ++++++++++++++++++++++++++++++++++ assembler/disasm-main.c | 20 +- assembler/gen4asm.h | 3 - 6 files changed, 1783 insertions(+), 117 deletions(-) rename assembler/{disasm.c => brw_disasm.c} (52%) create mode 100644 assembler/brw_eu.h create mode 100644 assembler/brw_reg.h diff --git a/assembler/Makefile.am b/assembler/Makefile.am index 5914356..8843e1a 100644 --- a/assembler/Makefile.am +++ b/assembler/Makefile.am @@ -11,6 +11,8 @@ gram.h: gram.c intel_gen4asm_SOURCES = \ brw_defines.h \ + brw_eu.h \ + brw_reg.h \ brw_structs.h \ gen4asm.h \ gram.y \ @@ -19,7 +21,7 @@ intel_gen4asm_SOURCES = \ $(NULL) intel_gen4disasm_SOURCES = \ - disasm.c disasm-main.c + brw_disasm.c disasm-main.c pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = intel-gen4asm.pc diff --git a/assembler/disasm.c b/assembler/brw_disasm.c similarity index 52% rename from assembler/disasm.c rename to assembler/brw_disasm.c index 9ebbeab..8eaeb78 100644 --- a/assembler/disasm.c +++ b/assembler/brw_disasm.c @@ -28,13 +28,9 @@ #include <stdarg.h> #include "gen4asm.h" -#include "brw_defines.h" +#include "brw_eu.h" -struct { - char *name; - int nsrc; - int ndst; -} opcode[128] = { +const struct opcode_desc opcode_descs[128] = { [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, @@ -48,13 +44,15 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, - [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1}, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, @@ -71,12 +69,12 @@ struct { [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, - [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, @@ -87,8 +85,9 @@ struct { [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, }; +static const struct opcode_desc *opcode = opcode_descs; -char *conditional_modifier[16] = { +static const char * const conditional_modifier[16] = { [BRW_CONDITIONAL_NONE] = "", [BRW_CONDITIONAL_Z] = ".e", [BRW_CONDITIONAL_NZ] = ".ne", @@ -101,17 +100,17 @@ char *conditional_modifier[16] = { [BRW_CONDITIONAL_U] = ".u", }; -char *negate[2] = { +static const char * const negate_op[2] = { [0] = "", [1] = "-", }; -char *_abs[2] = { +static const char * const _abs[2] = { [0] = "", [1] = "(abs)", }; -char *vert_stride[16] = { +static const char * const vert_stride[16] = { [0] = "0", [1] = "1", [2] = "2", @@ -122,7 +121,7 @@ char *vert_stride[16] = { [15] = "VxH", }; -char *width[8] = { +static const char * const width[8] = { [0] = "1", [1] = "2", [2] = "4", @@ -130,34 +129,41 @@ char *width[8] = { [4] = "16", }; -char *horiz_stride[4] = { +static const char * const horiz_stride[4] = { [0] = "0", [1] = "1", [2] = "2", [3] = "4" }; -char *chan_sel[4] = { +static const char * const chan_sel[4] = { [0] = "x", [1] = "y", [2] = "z", [3] = "w", }; -char *dest_condmod[16] = { -}; - -char *debug_ctrl[2] = { +static const char * const debug_ctrl[2] = { [0] = "", [1] = ".breakpoint" }; -char *saturate[2] = { +static const char * const saturate[2] = { [0] = "", [1] = ".sat" }; -char *exec_size[8] = { +static const char * const accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +static const char * const wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + +static const char * const exec_size[8] = { [0] = "1", [1] = "2", [2] = "4", @@ -166,12 +172,12 @@ char *exec_size[8] = { [5] = "32" }; -char *pred_inv[2] = { +static const char * const pred_inv[2] = { [0] = "+", [1] = "-" }; -char *pred_ctrl_align16[16] = { +static const char * const pred_ctrl_align16[16] = { [1] = "", [2] = ".x", [3] = ".y", @@ -181,7 +187,7 @@ char *pred_ctrl_align16[16] = { [7] = ".all4h", }; -char *pred_ctrl_align1[16] = { +static const char * const pred_ctrl_align1[16] = { [1] = "", [2] = ".anyv", [3] = ".allv", @@ -195,35 +201,36 @@ char *pred_ctrl_align1[16] = { [11] = ".all16h", }; -char *thread_ctrl[4] = { +static const char * const thread_ctrl[4] = { [0] = "", [2] = "switch" }; -char *compr_ctrl[4] = { +static const char * const compr_ctrl[4] = { [0] = "", [1] = "sechalf", [2] = "compr", + [3] = "compr4", }; -char *dep_ctrl[4] = { +static const char * const dep_ctrl[4] = { [0] = "", [1] = "NoDDClr", [2] = "NoDDChk", [3] = "NoDDClr,NoDDChk", }; -char *mask_ctrl[4] = { +static const char * const mask_ctrl[4] = { [0] = "", [1] = "nomask", }; -char *access_mode[2] = { +static const char * const access_mode[2] = { [0] = "align1", [1] = "align16", }; -char *reg_encoding[8] = { +static const char * const reg_encoding[8] = { [0] = "UD", [1] = "D", [2] = "UW", @@ -233,24 +240,24 @@ char *reg_encoding[8] = { [7] = "F" }; -char *imm_encoding[8] = { - [0] = "UD", - [1] = "D", - [2] = "UW", - [3] = "W", - [5] = "VF", - [6] = "V", - [7] = "F" +const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 }; -char *reg_file[4] = { +static const char * const reg_file[4] = { [0] = "A", [1] = "g", [2] = "m", [3] = "imm", }; -char *writemask[16] = { +static const char * const writemask[16] = { [0x0] = ".", [0x1] = ".x", [0x2] = ".y", @@ -269,12 +276,12 @@ char *writemask[16] = { [0xf] = "", }; -char *end_of_thread[2] = { +static const char * const end_of_thread[2] = { [0] = "", [1] = "EOT" }; -char *target_function[16] = { +static const char * const target_function[16] = { [BRW_SFID_NULL] = "null", [BRW_SFID_MATH] = "math", [BRW_SFID_SAMPLER] = "sampler", @@ -285,7 +292,37 @@ char *target_function[16] = { [BRW_SFID_THREAD_SPAWNER] = "thread_spawner" }; -char *math_function[16] = { +static const char * const target_function_gen6[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner", + [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", + [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", + [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", + [GEN7_SFID_DATAPORT_DATA_CACHE] = "data" +}; + +static const char * const dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + +static const char * const math_function[16] = { [BRW_MATH_FUNCTION_INV] = "inv", [BRW_MATH_FUNCTION_LOG] = "log", [BRW_MATH_FUNCTION_EXP] = "exp", @@ -297,52 +334,57 @@ char *math_function[16] = { [BRW_MATH_FUNCTION_TAN] = "tan", [BRW_MATH_FUNCTION_POW] = "pow", [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", - [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", - [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", }; -char *math_saturate[2] = { +static const char * const math_saturate[2] = { [0] = "", [1] = "sat" }; -char *math_signed[2] = { +static const char * const math_signed[2] = { [0] = "", [1] = "signed" }; -char *math_scalar[2] = { +static const char * const math_scalar[2] = { [0] = "", [1] = "scalar" }; -char *math_precision[2] = { +static const char * const math_precision[2] = { [0] = "", [1] = "partial_precision" }; -char *urb_swizzle[4] = { +static const char * const urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + +static const char * const urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", }; -char *urb_allocate[2] = { +static const char * const urb_allocate[2] = { [0] = "", [1] = "allocate" }; -char *urb_used[2] = { +static const char * const urb_used[2] = { [0] = "", [1] = "used" }; -char *urb_complete[2] = { +static const char * const urb_complete[2] = { [0] = "", [1] = "complete" }; -char *sampler_target_format[4] = { +static const char * const sampler_target_format[4] = { [0] = "F", [2] = "UD", [3] = "D" @@ -351,20 +393,21 @@ char *sampler_target_format[4] = { static int column; -static int string (FILE *file, char *string) +static int string (FILE *file, const char *string) { fputs (string, file); column += strlen (string); return 0; } -static int format (FILE *f, char *format, ...) +static int format (FILE *f, const char *format, ...) { char buf[1024]; va_list args; va_start (args, format); vsnprintf (buf, sizeof (buf) - 1, format, args); + va_end (args); string (f, buf); return 0; } @@ -384,7 +427,8 @@ static int pad (FILE *f, int c) return 0; } -static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +static int control (FILE *file, const char *name, const char * const ctrl[], + GLuint id, int *space) { if (!ctrl[id]) { fprintf (file, "*** invalid %s value %d ", @@ -415,6 +459,11 @@ static int print_opcode (FILE *file, int id) static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) { int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: @@ -426,6 +475,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) case BRW_ARF_ACCUMULATOR: format (file, "acc%d", _reg_nr & 0x0f); break; + case BRW_ARF_FLAG: + format (file, "f%d", _reg_nr & 0x0f); + break; case BRW_ARF_MASK: format (file, "mask%d", _reg_nr & 0x0f); break; @@ -468,7 +520,8 @@ static int dest (FILE *file, struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da1.dest_subreg_nr) - format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); } @@ -476,7 +529,8 @@ static int dest (FILE *file, struct brw_instruction *inst) { string (file, "g[a0"); if (inst->bits1.ia1.dest_subreg_nr) - format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); if (inst->bits1.ia1.dest_indirect_offset) format (file, " %d", inst->bits1.ia1.dest_indirect_offset); string (file, "]"); @@ -492,7 +546,8 @@ static int dest (FILE *file, struct brw_instruction *inst) if (err == -1) return 0; if (inst->bits1.da16.dest_subreg_nr) - format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); string (file, "<1>"); err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); @@ -507,6 +562,28 @@ static int dest (FILE *file, struct brw_instruction *inst) return 0; } +static int dest_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + uint32_t reg_file; + + if (inst->bits1.da3src.dest_reg_file) + reg_file = BRW_MESSAGE_REGISTER_FILE; + else + reg_file = BRW_GENERAL_REGISTER_FILE; + + err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da3src.dest_subreg_nr) + format (file, ".%d", inst->bits1.da3src.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL); + + return 0; +} + static int src_align1_region (FILE *file, GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) { @@ -526,14 +603,14 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) { int err = 0; - err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "negate", negate_op, _negate, NULL); err |= control (file, "abs", _abs, __abs, NULL); err |= reg (file, _reg_file, reg_num); if (err == -1) return 0; if (sub_reg_num) - format (file, ".%d", sub_reg_num); + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ src_align1_region (file, _vert_stride, _width, _horiz_stride); err |= control (file, "src reg encoding", reg_encoding, type, NULL); return err; @@ -552,7 +629,7 @@ static int src_ia1 (FILE *file, GLuint _vert_stride) { int err = 0; - err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "negate", negate_op, _negate, NULL); err |= control (file, "abs", _abs, __abs, NULL); string (file, "g[a0"); @@ -580,18 +657,120 @@ static int src_da16 (FILE *file, GLuint swz_w) { int err = 0; - err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "negate", negate_op, _negate, NULL); err |= control (file, "abs", _abs, __abs, NULL); err |= reg (file, _reg_file, _reg_nr); if (err == -1) return 0; if (_subreg_nr) - format (file, ".%d", _subreg_nr); + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); string (file, "<"); err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); - string (file, ",1,1>"); + string (file, ",4,1>"); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + return err; +} + +static int src0_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate_op, inst->bits1.da3src.src0_negate, NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr); + if (err == -1) + return 0; + if (inst->bits2.da3src.src0_subreg_nr) + format (file, ".%d", inst->bits2.da3src.src0_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + +static int src1_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3; + GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low | + (inst->bits3.da3src.src1_subreg_nr_high << 2)); + + err |= control (file, "negate", negate_op, inst->bits1.da3src.src1_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src1_reg_nr); + if (err == -1) + return 0; + if (src1_subreg_nr) + format (file, ".%d", src1_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); /* * Three kinds of swizzle display: * identity - nothing printed @@ -622,6 +801,56 @@ static int src_da16 (FILE *file, } +static int src2_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate_op, inst->bits1.da3src.src2_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src2_reg_nr); + if (err == -1) + return 0; + if (inst->bits3.da3src.src2_subreg_nr) + format (file, ".%d", inst->bits3.da3src.src2_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { switch (type) { case BRW_REGISTER_TYPE_UD: @@ -771,7 +1000,45 @@ static int src1 (FILE *file, struct brw_instruction *inst) } } -int disasm (FILE *file, struct brw_instruction *inst) +int esize[6] = { + [0] = 1, + [1] = 2, + [2] = 4, + [3] = 8, + [4] = 16, + [5] = 32, +}; + +static int qtr_ctrl(FILE *file, struct brw_instruction *inst) +{ + int qtr_ctl = inst->header.compression_control; + int exec_size = esize[inst->header.execution_size]; + + if (exec_size == 8) { + switch (qtr_ctl) { + case 0: + string (file, " 1Q"); + break; + case 1: + string (file, " 2Q"); + break; + case 2: + string (file, " 3Q"); + break; + case 3: + string (file, " 4Q"); + break; + } + } else if (exec_size == 16){ + if (qtr_ctl < 2) + string (file, " 1H"); + else + string (file, " 2H"); + } + return 0; +} + +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -779,7 +1046,7 @@ int disasm (FILE *file, struct brw_instruction *inst) if (inst->header.predicate_control) { string (file, "("); err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); - format (file, "f%d", inst->bits2.da1.flag_reg_nr); + format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); if (inst->bits2.da1.flag_subreg_nr) format (file, ".%d", inst->bits2.da1.flag_subreg_nr); if (inst->header.access_mode == BRW_ALIGN_1) @@ -795,42 +1062,106 @@ int disasm (FILE *file, struct brw_instruction *inst) err |= control (file, "saturate", saturate, inst->header.saturate, NULL); err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); - if (inst->header.opcode != BRW_OPCODE_SEND && - inst->header.opcode != BRW_OPCODE_SENDC) + if (inst->header.opcode == BRW_OPCODE_MATH) { + string (file, " "); + err |= control (file, "function", math_function, + inst->header.destreg__conditionalmod, NULL); + } else if (inst->header.opcode != BRW_OPCODE_SEND && + inst->header.opcode != BRW_OPCODE_SENDC) { err |= control (file, "conditional modifier", conditional_modifier, inst->header.destreg__conditionalmod, NULL); + /* If we're using the conditional modifier, print which flags reg is + * used for it. Note that on gen6+, the embedded-condition SEL and + * control flow doesn't update flags. + */ + if (inst->header.destreg__conditionalmod && + (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL && + inst->header.opcode != BRW_OPCODE_IF && + inst->header.opcode != BRW_OPCODE_WHILE))) { + format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); + if (inst->bits2.da1.flag_subreg_nr) + format (file, ".%d", inst->bits2.da1.flag_subreg_nr); + } + } + if (inst->header.opcode != BRW_OPCODE_NOP) { string (file, "("); err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); string (file, ")"); } - if (inst->header.opcode == BRW_OPCODE_SEND || - inst->header.opcode == BRW_OPCODE_SENDC) + if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6) format (file, " %d", inst->header.destreg__conditionalmod); - if (opcode[inst->header.opcode].ndst > 0) { - pad (file, 16); - err |= dest (file, inst); - } - if (opcode[inst->header.opcode].nsrc > 0) { - pad (file, 32); - err |= src0 (file, inst); - } - if (opcode[inst->header.opcode].nsrc > 1) { - pad (file, 48); - err |= src1 (file, inst); + if (opcode[inst->header.opcode].nsrc == 3) { + pad (file, 16); + err |= dest_3src (file, inst); + + pad (file, 32); + err |= src0_3src (file, inst); + + pad (file, 48); + err |= src1_3src (file, inst); + + pad (file, 64); + err |= src2_3src (file, inst); + } else { + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits3.break_cont.jip); + } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits1.branch_gen6.jump_count); + } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK || + inst->header.opcode == BRW_OPCODE_CONTINUE || + inst->header.opcode == BRW_OPCODE_HALT)) || + (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) { + format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip); + } else if (inst->header.opcode == BRW_OPCODE_JMPI) { + format (file, " %d", inst->bits3.d); + } + + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } } if (inst->header.opcode == BRW_OPCODE_SEND || inst->header.opcode == BRW_OPCODE_SENDC) { + enum brw_message_target target; + + if (gen >= 6) + target = inst->header.destreg__conditionalmod; + else if (gen == 5) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + newline (file); pad (file, 16); space = 0; - err |= control (file, "target function", target_function, - inst->header.destreg__conditionalmod, &space); - switch (inst->header.destreg__conditionalmod) { + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } + + switch (target) { case BRW_SFID_MATH: err |= control (file, "math function", math_function, inst->bits3.math.function, &space); @@ -844,24 +1175,98 @@ int disasm (FILE *file, struct brw_instruction *inst) inst->bits3.math.precision, &space); break; case BRW_SFID_SAMPLER: - format (file, " (%d, %d, ", - inst->bits3.sampler.binding_table_index, - inst->bits3.sampler.sampler); - err |= control (file, "sampler target format", sampler_target_format, - inst->bits3.sampler.return_format, NULL); - string (file, ")"); + if (gen >= 7) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen7.binding_table_index, + inst->bits3.sampler_gen7.sampler, + inst->bits3.sampler_gen7.msg_type, + inst->bits3.sampler_gen7.simd_mode); + } else if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", + sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_SFID_DATAPORT_READ: + if (gen >= 6) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else if (gen >= 5 /* FINISHME: || is_g4x */) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } break; + case BRW_SFID_DATAPORT_WRITE: - format (file, " (%d, %d, %d, %d)", - inst->bits3.dp_write.binding_table_index, - (inst->bits3.dp_write.last_render_target << 3) | - inst->bits3.dp_write.msg_control, - inst->bits3.dp_write.msg_type, - inst->bits3.dp_write.send_commit_msg); + if (gen >= 7) { + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen7_dp.msg_type, &space); + + format (file, ", %d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + } else if (gen == 6) { + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.last_render_target << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } break; + case BRW_SFID_URB: - format (file, " %d", inst->bits3.urb.offset); + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + space = 1; + if (gen >= 5) { + err |= control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, @@ -873,27 +1278,62 @@ int disasm (FILE *file, struct brw_instruction *inst) break; case BRW_SFID_THREAD_SPAWNER: break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + format (file, " (%d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + break; + + default: - format (file, "unsupported target %d", inst->bits3.generic.msg_target); + format (file, "unsupported target %d", target); break; } if (space) string (file, " "); - format (file, "mlen %d", - inst->bits3.generic.msg_length); - format (file, " rlen %d", - inst->bits3.generic.response_length); + if (gen >= 5) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } } pad (file, 64); if (inst->header.opcode != BRW_OPCODE_NOP) { string (file, "{"); space = 1; err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); - err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + if (gen >= 6) + err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space); + else + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); - err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + + if (gen >= 6) + err |= qtr_ctrl (file, inst); + else { + if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && + opcode[inst->header.opcode].ndst > 0 && + inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && + inst->bits1.da1.dest_reg_nr & (1 << 7)) { + format (file, " compr4"); + } else { + err |= control (file, "compression control", compr_ctrl, + inst->header.compression_control, &space); + } + } + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); - if (inst->header.opcode == BRW_OPCODE_SEND) + if (gen >= 6) + err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) err |= control (file, "end of thread", end_of_thread, inst->bits3.generic.end_of_thread, &space); if (space) diff --git a/assembler/brw_eu.h b/assembler/brw_eu.h new file mode 100644 index 0000000..b7009ff --- /dev/null +++ b/assembler/brw_eu.h @@ -0,0 +1,405 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith at tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include <stdbool.h> +#include "brw_structs.h" +#include "brw_defines.h" +#include "brw_reg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BRW_EU_MAX_INSN_STACK 5 + +struct brw_compile { + struct brw_instruction *store; + int store_size; + GLuint nr_insn; + unsigned int next_insn_offset; + + void *mem_ctx; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + GLuint flag_value; + bool single_program_flow; + bool compressed; + struct brw_context *brw; + + /* Control flow stacks: + * - if_stack contains IF and ELSE instructions which must be patched + * (and popped) once the matching ENDIF instruction is encountered. + * + * Just store the instruction pointer(an index). + */ + int *if_stack; + int if_stack_depth; + int if_stack_array_size; + + /** + * loop_stack contains the instruction pointers of the starts of loops which + * must be patched (and popped) once the matching WHILE instruction is + * encountered. + */ + int *loop_stack; + /** + * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF + * blocks they were popping out of, to fix up the mask stack. This tracks + * the IF/ENDIF nesting in each current nested loop level. + */ + int *if_depth_in_loop; + int loop_stack_depth; + int loop_stack_array_size; +}; + +static inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, GLuint value ); +void brw_set_saturate( struct brw_compile *p, bool enable ); +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); +void brw_set_compression_control(struct brw_compile *p, enum brw_compression c); +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse); +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); +void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg); +void brw_set_acc_write_control(struct brw_compile *p, GLuint value); + +void brw_init_compile(struct brw_context *, struct brw_compile *p, + void *mem_ctx); +void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end); +const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); + +struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + +void gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + GLuint msg_reg_nr); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2); + +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(AVG) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) +ALU2(PLN) +ALU3(MAD) + +ROUND(RNDZ) +ROUND(RNDE) + +#undef ALU1 +#undef ALU2 +#undef ALU3 +#undef ROUND + + +/* Helpers for SEND instruction: + */ +void brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLuint header_present, + GLuint simd_mode, + GLuint return_format); + +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + bool header_present, + GLuint response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + bool header_present, + GLuint last_render_target, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg); + +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + GLuint msg_length, + GLuint response_length, + bool eot, + bool writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + bool allocate, + GLuint response_length, + bool eot); + +void brw_svb_write(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + bool send_commit_msg); + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint msg_control, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + bool eot, + bool header_present); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLuint header_present, + GLuint simd_mode, + GLuint return_format); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ); + +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + GLuint function, + struct brw_reg src0, + struct brw_reg src1); + +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index); + +void brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + GLuint offset); + +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + GLuint offset); + +void brw_shader_time_add(struct brw_compile *p, + int mrf, + uint32_t surf_index); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + GLuint execute_size); +struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1); + +void brw_ELSE(struct brw_compile *p); +void brw_ENDIF(struct brw_compile *p); + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +struct brw_instruction *gen6_CONT(struct brw_compile *p); +struct brw_instruction *gen6_HALT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); + + + +void brw_NOP(struct brw_compile *p); + +void brw_WAIT(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1); + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg); + +void brw_set_uip_jip(struct brw_compile *p); + +uint32_t brw_swap_cmod(uint32_t cmod); + +/* brw_optimize.c */ +void brw_optimize(struct brw_compile *p); +void brw_remove_duplicate_mrf_moves(struct brw_compile *p); +void brw_remove_grf_to_mrf_moves(struct brw_compile *p); + +/* brw_disasm.c */ +struct opcode_desc { + char *name; + int nsrc; + int ndst; +}; + +extern const struct opcode_desc opcode_descs[128]; + +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/assembler/brw_reg.h b/assembler/brw_reg.h new file mode 100644 index 0000000..f225915 --- /dev/null +++ b/assembler/brw_reg.h @@ -0,0 +1,808 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith at tungstengraphics.com> + */ + +/** @file brw_reg.h + * + * This file defines struct brw_reg, which is our representation for EU + * registers. They're not a hardware specific format, just an abstraction + * that intends to capture the full flexibility of the hardware registers. + * + * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode + * the abstract brw_reg type into the actual hardware instruction encoding. + */ + +#ifndef BRW_REG_H +#define BRW_REG_H + +#include <stdbool.h> +#include <assert.h> +#include "brw_defines.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** + * First GRF used for the MRF hack. + * + * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We + * haven't converted our compiler to be aware of this, so it asks for MRFs and + * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The + * register allocators have to be careful of this to avoid corrupting the "MRF"s + * with actual GRF allocations. + */ +#define GEN7_MRF_HACK_START 112 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +static inline bool +brw_is_single_value_swizzle(int swiz) +{ + return (swiz == BRW_SWIZZLE_XXXX || + swiz == BRW_SWIZZLE_YYYY || + swiz == BRW_SWIZZLE_ZZZZ || + swiz == BRW_SWIZZLE_WWWW); +} + +#define BRW_WRITEMASK_X 0x1 +#define BRW_WRITEMASK_Y 0x2 +#define BRW_WRITEMASK_Z 0x4 +#define BRW_WRITEMASK_W 0x8 + +#define BRW_WRITEMASK_XY (BRW_WRITEMASK_X | BRW_WRITEMASK_Y) +#define BRW_WRITEMASK_XZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Z) +#define BRW_WRITEMASK_XW (BRW_WRITEMASK_X | BRW_WRITEMASK_W) +#define BRW_WRITEMASK_YW (BRW_WRITEMASK_Y | BRW_WRITEMASK_W) +#define BRW_WRITEMASK_ZW (BRW_WRITEMASK_Z | BRW_WRITEMASK_W) +#define BRW_WRITEMASK_XYZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | BRW_WRITEMASK_Z) +#define BRW_WRITEMASK_XYZW (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | \ + BRW_WRITEMASK_Z | BRW_WRITEMASK_W) + +#define REG_SIZE (8*4) + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + + +static inline int +type_sz(unsigned type) +{ + switch(type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield + */ +static inline struct brw_reg +brw_reg(unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + struct brw_reg reg; + if (file == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_TIMESTAMP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static inline struct brw_reg +brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static inline struct brw_reg +brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static inline struct brw_reg +brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static inline struct brw_reg +brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + BRW_WRITEMASK_XY); +} + +/** Construct float[1] register */ +static inline struct brw_reg +brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); +} + + +static inline struct brw_reg +retype(struct brw_reg reg, unsigned type) +{ + reg.type = type; + return reg; +} + +static inline struct brw_reg +sechalf(struct brw_reg reg) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +static inline struct brw_reg +suboffset(struct brw_reg reg, unsigned delta) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static inline struct brw_reg +offset(struct brw_reg reg, unsigned delta) +{ + reg.nr += delta; + return reg; +} + + +static inline struct brw_reg +byte_offset(struct brw_reg reg, unsigned bytes) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static inline struct brw_reg +brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static inline struct brw_reg +brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static inline struct brw_reg +brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static inline struct brw_reg +brw_imm_reg(unsigned type) +{ + return brw_reg(BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static inline struct brw_reg +brw_imm_f(float f) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static inline struct brw_reg +brw_imm_d(int d) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static inline struct brw_reg +brw_imm_ud(unsigned ud) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static inline struct brw_reg +brw_imm_uw(uint16_t uw) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static inline struct brw_reg +brw_imm_w(int16_t w) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static inline struct brw_reg +brw_imm_v(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static inline struct brw_reg +brw_imm_vf(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static inline struct brw_reg +brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); + return imm; +} + + +static inline struct brw_reg +brw_address(struct brw_reg reg) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static inline struct brw_reg +brw_vec1_grf(unsigned nr, unsigned subnr) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static inline struct brw_reg +brw_vec2_grf(unsigned nr, unsigned subnr) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static inline struct brw_reg +brw_vec4_grf(unsigned nr, unsigned subnr) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static inline struct brw_reg +brw_vec8_grf(unsigned nr, unsigned subnr) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static inline struct brw_reg +brw_uw8_grf(unsigned nr, unsigned subnr) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg +brw_uw16_grf(unsigned nr, unsigned subnr) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +/** Construct null register (usually used for setting condition codes) */ +static inline struct brw_reg +brw_null_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); +} + +static inline struct brw_reg +brw_address_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static inline struct brw_reg +brw_ip_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + BRW_WRITEMASK_XYZW); /* NOTE! */ +} + +static inline struct brw_reg +brw_acc_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0); +} + +static inline struct brw_reg +brw_notification_1_reg(void) +{ + + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 1, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); +} + + +static inline struct brw_reg +brw_flag_reg(int reg, int subreg) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG + reg, subreg); +} + + +static inline struct brw_reg +brw_mask_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr); +} + +static inline struct brw_reg +brw_message_reg(unsigned nr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); +} + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static inline unsigned cvt(unsigned val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static inline struct brw_reg +stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + + +static inline struct brw_reg +vec16(struct brw_reg reg) +{ + return stride(reg, 16,16,1); +} + +static inline struct brw_reg +vec8(struct brw_reg reg) +{ + return stride(reg, 8,8,1); +} + +static inline struct brw_reg +vec4(struct brw_reg reg) +{ + return stride(reg, 4,4,1); +} + +static inline struct brw_reg +vec2(struct brw_reg reg) +{ + return stride(reg, 2,2,1); +} + +static inline struct brw_reg +vec1(struct brw_reg reg) +{ + return stride(reg, 0,1,0); +} + + +static inline struct brw_reg +get_element(struct brw_reg reg, unsigned elt) +{ + return vec1(suboffset(reg, elt)); +} + +static inline struct brw_reg +get_element_ud(struct brw_reg reg, unsigned elt) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static inline struct brw_reg +get_element_d(struct brw_reg reg, unsigned elt) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt)); +} + + +static inline struct brw_reg +brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static inline struct brw_reg +brw_swizzle1(struct brw_reg reg, unsigned x) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static inline struct brw_reg +brw_writemask(struct brw_reg reg, unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask &= mask; + return reg; +} + +static inline struct brw_reg +brw_set_writemask(struct brw_reg reg, unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask = mask; + return reg; +} + +static inline struct brw_reg +negate(struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static inline struct brw_reg +brw_abs(struct brw_reg reg) +{ + reg.abs = 1; + reg.negate = 0; + return reg; +} + +/************************************************************************/ + +static inline struct brw_reg +brw_vec4_indirect(unsigned subnr, int offset) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg +brw_vec1_indirect(unsigned subnr, int offset) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg +deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg +deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg +deref_4b(struct brw_indirect ptr, int offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static inline struct brw_reg +deref_1uw(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg +deref_1d(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg +deref_1ud(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg +get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static inline struct brw_indirect +brw_indirect_offset(struct brw_indirect ptr, int offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static inline struct brw_indirect +brw_indirect(unsigned addr_subnr, int offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static inline bool +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +void brw_print_reg(struct brw_reg reg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/assembler/disasm-main.c b/assembler/disasm-main.c index 5cc1e7d..b900e91 100644 --- a/assembler/disasm-main.c +++ b/assembler/disasm-main.c @@ -27,6 +27,7 @@ #include <unistd.h> #include "gen4asm.h" +#include "brw_eu.h" static const struct option longopts[] = { { NULL, 0, NULL, 0 } @@ -95,7 +96,10 @@ read_program_binary (FILE *input) static void usage(void) { - fprintf(stderr, "usage: intel-gen4disasm [-o outputfile] [-b] inputfile\n"); + fprintf(stderr, "usage: intel-gen4disasm [options] inputfile\n"); + fprintf(stderr, "\t-b, --binary C style binary output\n"); + fprintf(stderr, "\t-o, --output {outputfile} Specify output file\n"); + fprintf(stderr, "\t-g, --gen <4|5|6|7> Specify GPU generation\n"); } int main(int argc, char **argv) @@ -107,9 +111,10 @@ int main(int argc, char **argv) char *output_file = NULL; int byte_array_input = 0; int o; + int gen = 4; struct brw_program_instruction *inst; - while ((o = getopt_long(argc, argv, "o:b", longopts, NULL)) != -1) { + while ((o = getopt_long(argc, argv, "o:bg:", longopts, NULL)) != -1) { switch (o) { case 'o': if (strcmp(optarg, "-") != 0) @@ -118,6 +123,15 @@ int main(int argc, char **argv) case 'b': byte_array_input = 1; break; + case 'g': + gen = strtol(optarg, NULL, 10); + + if (gen < 4 || gen > 7) { + usage(); + exit(1); + } + + break; default: usage(); exit(1); @@ -153,6 +167,6 @@ int main(int argc, char **argv) } for (inst = program->first; inst; inst = inst->next) - disasm (output, &inst->instruction); + brw_disasm (output, &inst->instruction, gen); exit (0); } diff --git a/assembler/gen4asm.h b/assembler/gen4asm.h index f9ed161..e47e9e6 100644 --- a/assembler/gen4asm.h +++ b/assembler/gen4asm.h @@ -197,6 +197,3 @@ int yylex_destroy(void); char * lex_text(void); - -int -disasm (FILE *output, struct brw_instruction *inst); -- 1.7.7.5