Quoting Mika Kuoppala (2019-09-24 11:21:38) > Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > > +static u32 *set_offsets(u32 *regs, > > + const u8 *data, > > + const struct intel_engine_cs *engine) > > +#define NOP(x) (BIT(7) | (x)) > > +#define LRI(count, flags) ((flags) << 6 | (count)) > > +#define POSTED BIT(0) > > +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) > > +#define REG16(x) \ > > + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ > > + (((x) >> 2) & 0x7f) > > I am still not sure if the actual saving are worth the complexity. > > > +#define END() 0 > > +{ > > + const u32 base = engine->mmio_base; > > + > > + while (*data) { > > + u8 count, flags; > > + > > + if (*data & BIT(7)) { /* skip */ > > + regs += *data++ & ~BIT(7); > > + continue; > > + } > > + > > + count = *data & 0x3f; > > + flags = *data >> 6; > > + data++; > > + > > + *regs = MI_LOAD_REGISTER_IMM(count); > > + if (flags & POSTED) > > + *regs |= MI_LRI_FORCE_POSTED; > > + if (INTEL_GEN(engine->i915) >= 11) > > + *regs |= MI_LRI_CS_MMIO; > > + regs++; > > + > > + GEM_BUG_ON(!count); > > + do { > > + u32 offset = 0; > > + u8 v; > > + > > + do { > > + v = *data++; > > + offset <<= 7; > > + offset |= v & ~BIT(7); > > + } while (v & BIT(7)); > > ...but perhaps this amount of extra can be tolerated. > > Did you check how this would play out with just REG being wide enough? Function old new delta gen9_xcs_offsets 122 145 +23 gen12_xcs_offsets 136 157 +21 gen11_rcs_offsets 44 60 +16 gen8_rcs_offsets 41 55 +14 gen12_rcs_offsets 47 60 +13 gen8_xcs_offsets 40 51 +11 set_offsets.isra 215 179 -36 -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx