On Fri, Nov 20, 2015 at 10:55:57AM +0000, Chris Wilson wrote: > The cmd parser has the biggest impact on the BLT ring, because it is > relatively verbose composed to the other engines as the vertex data is > inline. It also typically has runs of repeating commands (again since > the vertex data is inline, it typically has sequences of XY_SETUP_BLT, > XY_SCANLINE_BLT..) We can easily reduce the impact of cmdparsing on > benchmarks by then caching the last descriptor and comparing it against > the next command header. To get maximum benefit, we also want to take > advantage of skipping a few validations and length determinations if the > header is unchanged between commands. > > ivb i7-3720QM: > x11perf -dot: before 52.3M, after 124M (max 203M) > glxgears: before 7310 fps, after 7550 fps (max 7860 fps) > > v2: Fix initial cached cmd descriptor to match MI_NOOP. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_cmd_parser.c | 133 +++++++++++++++------------------ > drivers/gpu/drm/i915/i915_drv.h | 10 +-- > 2 files changed, 64 insertions(+), 79 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c > index db3a04ea036a..c6f6d9f2b2ce 100644 > --- a/drivers/gpu/drm/i915/i915_cmd_parser.c > +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c > @@ -794,6 +794,9 @@ void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring) > fini_hash_table(ring); > } > > +/* > + * Returns a pointer to a descriptor for the command specified by cmd_header. > + */ > static const struct drm_i915_cmd_descriptor* > find_cmd_in_table(struct intel_engine_cs *ring, > u32 cmd_header) > @@ -813,37 +816,6 @@ find_cmd_in_table(struct intel_engine_cs *ring, > return NULL; > } > > -/* > - * Returns a pointer to a descriptor for the command specified by cmd_header. > - * > - * The caller must supply space for a default descriptor via the default_desc > - * parameter. If no descriptor for the specified command exists in the ring's > - * command parser tables, this function fills in default_desc based on the > - * ring's default length encoding and returns default_desc. > - */ > -static const struct drm_i915_cmd_descriptor* > -find_cmd(struct intel_engine_cs *ring, > - u32 cmd_header, > - struct drm_i915_cmd_descriptor *default_desc) > -{ > - const struct drm_i915_cmd_descriptor *desc; > - u32 mask; > - > - desc = find_cmd_in_table(ring, cmd_header); > - if (desc) > - return desc; > - > - mask = ring->get_cmd_length_mask(cmd_header); > - if (!mask) > - return NULL; > - > - BUG_ON(!default_desc); > - default_desc->flags = CMD_DESC_SKIP; > - default_desc->length.mask = mask; > - > - return default_desc; > -} > - > static const struct drm_i915_reg_descriptor * > find_reg(const struct drm_i915_reg_descriptor *table, > int count, u32 addr) > @@ -886,17 +858,6 @@ static bool check_cmd(const struct intel_engine_cs *ring, > const bool is_master, > bool *oacontrol_set) > { > - if (desc->flags & CMD_DESC_REJECT) { > - DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); > - return false; > - } > - > - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { > - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", > - *cmd); > - return false; > - } > - > if (desc->flags & CMD_DESC_REGISTER) { > /* > * Get the distance between individual register offset > @@ -1027,14 +988,15 @@ int i915_parse_cmds(struct intel_engine_cs *ring, > u32 batch_len, > bool is_master) > { > - const struct drm_i915_cmd_descriptor *desc; > + struct drm_i915_cmd_descriptor default_desc = { CMD_DESC_SKIP }; > + const struct drm_i915_cmd_descriptor *desc = &default_desc; > + u32 last_cmd_header = 0; > unsigned dst_iter, src_iter; > int needs_clflush = 0; > struct get_page rewind; > void *src, *dst, *tmp; > - u32 partial, length; > + u32 partial, length = 1; > unsigned in, out; > - struct drm_i915_cmd_descriptor default_desc = { 0 }; > bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ > int ret = 0; > > @@ -1100,40 +1062,63 @@ int i915_parse_cmds(struct intel_engine_cs *ring, > partial = 0; > > do { > - if (*cmd == MI_BATCH_BUFFER_END) { > - if (oacontrol_set) { > - DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n"); > - goto unmap; > + if (*cmd != last_cmd_header) { > + if (*cmd == MI_BATCH_BUFFER_END) { > + if (unlikely(oacontrol_set)) { > + DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n"); > + goto unmap; > + } > + > + cmd++; /* copy this cmd to dst */ > + batch_len = this; /* no more src */ > + ret = 0; > + break; > } > > - cmd++; /* copy this cmd to dst */ > - batch_len = this; /* no more src */ > - ret = 0; > - break; > - } > - > - desc = find_cmd(ring, *cmd, &default_desc); > - if (!desc) { > - DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", > - *cmd); > - goto unmap; > - } > + desc = find_cmd_in_table(ring, *cmd); > + if (desc) { > + if (unlikely(desc->flags & CMD_DESC_REJECT)) { > + DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); > + goto unmap; > + } Yikes. More indenttion. It's getting really deep here. Any sane way to reduce it? > + > + if (unlikely((desc->flags & CMD_DESC_MASTER) && !is_master)) { > + DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", *cmd); > + goto unmap; > + } > + > + /* > + * If the batch buffer contains a > + * chained batch, return an error that > + * tells the caller to abort and > + * dispatch the workload as a > + * non-secure batch. > + */ > + if (unlikely(desc->cmd.value == MI_BATCH_BUFFER_START)) { > + ret = -EACCES; > + goto unmap; > + } > + > + if (desc->flags & CMD_DESC_FIXED) > + length = desc->length.fixed; > + else > + length = (*cmd & desc->length.mask) + LENGTH_BIAS; > + } else { > + u32 mask = ring->get_cmd_length_mask(*cmd); > + if (unlikely(!mask)) { > + DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); > + goto unmap; > + } > + > + default_desc.length.mask = mask; > + desc = &default_desc; > + > + length = (*cmd & mask) + LENGTH_BIAS; > + } > > - /* > - * If the batch buffer contains a chained batch, return an > - * error that tells the caller to abort and dispatch the > - * workload as a non-secure batch. > - */ > - if (desc->cmd.value == MI_BATCH_BUFFER_START) { > - ret = -EACCES; > - goto unmap; > + last_cmd_header = *cmd; > } > > - if (desc->flags & CMD_DESC_FIXED) > - length = desc->length.fixed; > - else > - length = ((*cmd & desc->length.mask) + LENGTH_BIAS); > - > if (unlikely(cmd + length > page_end)) { > if (unlikely(cmd + length > batch_end)) { > DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 8d939649b919..28d5bfceae3b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2333,12 +2333,12 @@ struct drm_i915_cmd_descriptor { > * is the DRM master > */ > u32 flags; > +#define CMD_DESC_SKIP (0) > #define CMD_DESC_FIXED (1<<0) > -#define CMD_DESC_SKIP (1<<1) > -#define CMD_DESC_REJECT (1<<2) > -#define CMD_DESC_REGISTER (1<<3) > -#define CMD_DESC_BITMASK (1<<4) > -#define CMD_DESC_MASTER (1<<5) > +#define CMD_DESC_REJECT (1<<1) > +#define CMD_DESC_REGISTER (1<<2) > +#define CMD_DESC_BITMASK (1<<3) > +#define CMD_DESC_MASTER (1<<4) > > /* > * The command's unique identification bits and the bitmask to get them. > -- > 2.6.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx