On Mon, Apr 28, 2014 at 08:22:08AM -0700, Volkin, Bradley D wrote: > From: Brad Volkin <bradley.d.volkin@xxxxxxxxx> > > For clients that submit large batch buffers the command parser has > a substantial impact on performance. On my HSW ULT system performance > drops as much as ~20% on some tests. Most of the time is spent in the > command lookup code. Converting that from the current naive search to > a hash table lookup reduces the performance impact by as much as ~10%. Tvrtko pointed out that what I wrote here is a bit ambiguous. To clarify: Without the patch, perf drops 20% With the patch, perf drops 10% Brad > > The choice of value for I915_CMD_HASH_ORDER allows all commands > currently used in the parser tables to hash to their own bucket (except > for one collision on the render ring). The tradeoff is that it wastes > memory. Because the opcodes for the commands in the tables are not > particularly well distributed, reducing the order still leaves many > buckets empty. The increased collisions don't seem to have a huge > impact on the performance gain, but for now anyhow, the parser trades > memory for performance. > > Signed-off-by: Brad Volkin <bradley.d.volkin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_cmd_parser.c | 136 ++++++++++++++++++++++++-------- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + > drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- > 4 files changed, 116 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c > index 9bac097..9dca899 100644 > --- a/drivers/gpu/drm/i915/i915_cmd_parser.c > +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c > @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) > return 0; > } > > -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) > +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, > + const struct drm_i915_cmd_table *cmd_tables, > + int cmd_table_count) > { > int i; > bool ret = true; > > - if (!ring->cmd_tables || ring->cmd_table_count == 0) > + if (!cmd_tables || cmd_table_count == 0) > return true; > > - for (i = 0; i < ring->cmd_table_count; i++) { > - const struct drm_i915_cmd_table *table = &ring->cmd_tables[i]; > + for (i = 0; i < cmd_table_count; i++) { > + const struct drm_i915_cmd_table *table = &cmd_tables[i]; > u32 previous = 0; > int j; > > @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) > ring->master_reg_count); > } > > +struct cmd_node { > + const struct drm_i915_cmd_descriptor *desc; > + struct hlist_node node; > +}; > + > +/* > + * Different command ranges have different numbers of bits for the opcode. > + * In order to use the opcode bits, and only the opcode bits, for the hash key > + * we should use the MI_* command opcode mask (since those commands use the > + * fewest bits for the opcode.) > + */ > +#define CMD_HASH_MASK STD_MI_OPCODE_MASK > + > +static int init_hash_table(struct intel_ring_buffer *ring, > + const struct drm_i915_cmd_table *cmd_tables, > + int cmd_table_count) > +{ > + int i, j; > + > + hash_init(ring->cmd_hash); > + > + for (i = 0; i < cmd_table_count; i++) { > + const struct drm_i915_cmd_table *table = &cmd_tables[i]; > + > + for (j = 0; j < table->count; j++) { > + const struct drm_i915_cmd_descriptor *desc = > + &table->table[j]; > + struct cmd_node *desc_node = > + kmalloc(sizeof(*desc_node), GFP_KERNEL); > + > + if (!desc_node) > + return -ENOMEM; > + > + desc_node->desc = desc; > + hash_add(ring->cmd_hash, &desc_node->node, > + desc->cmd.value & CMD_HASH_MASK); > + } > + } > + > + return 0; > +} > + > +static void fini_hash_table(struct intel_ring_buffer *ring) > +{ > + struct hlist_node *tmp; > + struct cmd_node *desc_node; > + int i; > + > + hash_for_each_safe(ring->cmd_hash, i, tmp, desc_node, node) { > + hash_del(&desc_node->node); > + kfree(desc_node); > + } > +} > + > /** > * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer > * @ring: the ringbuffer to initialize > @@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) > */ > void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) > { > + const struct drm_i915_cmd_table *cmd_tables; > + int cmd_table_count; > + > if (!IS_GEN7(ring->dev)) > return; > > switch (ring->id) { > case RCS: > if (IS_HASWELL(ring->dev)) { > - ring->cmd_tables = hsw_render_ring_cmds; > - ring->cmd_table_count = > + cmd_tables = hsw_render_ring_cmds; > + cmd_table_count = > ARRAY_SIZE(hsw_render_ring_cmds); > } else { > - ring->cmd_tables = gen7_render_cmds; > - ring->cmd_table_count = ARRAY_SIZE(gen7_render_cmds); > + cmd_tables = gen7_render_cmds; > + cmd_table_count = ARRAY_SIZE(gen7_render_cmds); > } > > ring->reg_table = gen7_render_regs; > @@ -595,17 +654,17 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) > ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask; > break; > case VCS: > - ring->cmd_tables = gen7_video_cmds; > - ring->cmd_table_count = ARRAY_SIZE(gen7_video_cmds); > + cmd_tables = gen7_video_cmds; > + cmd_table_count = ARRAY_SIZE(gen7_video_cmds); > ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; > break; > case BCS: > if (IS_HASWELL(ring->dev)) { > - ring->cmd_tables = hsw_blt_ring_cmds; > - ring->cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); > + cmd_tables = hsw_blt_ring_cmds; > + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); > } else { > - ring->cmd_tables = gen7_blt_cmds; > - ring->cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); > + cmd_tables = gen7_blt_cmds; > + cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); > } > > ring->reg_table = gen7_blt_regs; > @@ -622,8 +681,8 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) > ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; > break; > case VECS: > - ring->cmd_tables = hsw_vebox_cmds; > - ring->cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); > + cmd_tables = hsw_vebox_cmds; > + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); > /* VECS can use the same length_mask function as VCS */ > ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; > break; > @@ -633,18 +692,38 @@ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) > BUG(); > } > > - BUG_ON(!validate_cmds_sorted(ring)); > + BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count)); > BUG_ON(!validate_regs_sorted(ring)); > + > + BUG_ON(init_hash_table(ring, cmd_tables, cmd_table_count)); > + > + ring->needs_cmd_parser = true; > +} > + > +/** > + * i915_cmd_parser_fini_ring() - clean up cmd parser related fields > + * @ring: the ringbuffer to clean up > + * > + * Releases any resources related to command parsing that may have been > + * initialized for the specified ring. > + */ > +void i915_cmd_parser_fini_ring(struct intel_ring_buffer *ring) > +{ > + if (!ring->needs_cmd_parser) > + return; > + > + fini_hash_table(ring); > } > > static const struct drm_i915_cmd_descriptor* > -find_cmd_in_table(const struct drm_i915_cmd_table *table, > +find_cmd_in_table(struct intel_ring_buffer *ring, > u32 cmd_header) > { > - int i; > + struct cmd_node *desc_node; > > - for (i = 0; i < table->count; i++) { > - const struct drm_i915_cmd_descriptor *desc = &table->table[i]; > + hash_for_each_possible(ring->cmd_hash, desc_node, node, > + cmd_header & CMD_HASH_MASK) { > + const struct drm_i915_cmd_descriptor *desc = desc_node->desc; > u32 masked_cmd = desc->cmd.mask & cmd_header; > u32 masked_value = desc->cmd.value & desc->cmd.mask; > > @@ -668,16 +747,12 @@ find_cmd(struct intel_ring_buffer *ring, > u32 cmd_header, > struct drm_i915_cmd_descriptor *default_desc) > { > + const struct drm_i915_cmd_descriptor *desc; > u32 mask; > - int i; > - > - for (i = 0; i < ring->cmd_table_count; i++) { > - const struct drm_i915_cmd_descriptor *desc; > > - desc = find_cmd_in_table(&ring->cmd_tables[i], cmd_header); > - if (desc) > - return desc; > - } > + desc = find_cmd_in_table(ring, cmd_header); > + if (desc) > + return desc; > > mask = ring->get_cmd_length_mask(cmd_header); > if (!mask) > @@ -748,8 +823,7 @@ bool i915_needs_cmd_parser(struct intel_ring_buffer *ring) > { > struct drm_i915_private *dev_priv = ring->dev->dev_private; > > - /* No command tables indicates a platform without parsing */ > - if (!ring->cmd_tables) > + if (!ring->needs_cmd_parser) > return false; > > /* > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 7d6acb4..8c8388f 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2414,6 +2414,7 @@ const char *i915_cache_level_str(int type); > /* i915_cmd_parser.c */ > int i915_cmd_parser_get_version(void); > void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring); > +void i915_cmd_parser_fini_ring(struct intel_ring_buffer *ring); > bool i915_needs_cmd_parser(struct intel_ring_buffer *ring); > int i915_parse_cmds(struct intel_ring_buffer *ring, > struct drm_i915_gem_object *batch_obj, > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index eb3dd26..06fa9a3 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1469,6 +1469,8 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) > ring->cleanup(ring); > > cleanup_status_page(ring); > + > + i915_cmd_parser_fini_ring(ring); > } > > static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 413cdc7..48daa91 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -1,6 +1,10 @@ > #ifndef _INTEL_RINGBUFFER_H_ > #define _INTEL_RINGBUFFER_H_ > > +#include <linux/hashtable.h> > + > +#define I915_CMD_HASH_ORDER 9 > + > /* > * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" > * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" > @@ -164,12 +168,13 @@ struct intel_ring_buffer { > volatile u32 *cpu_page; > } scratch; > > + bool needs_cmd_parser; > + > /* > - * Tables of commands the command parser needs to know about > + * Table of commands the command parser needs to know about > * for this ring. > */ > - const struct drm_i915_cmd_table *cmd_tables; > - int cmd_table_count; > + DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); > > /* > * Table of registers allowed in commands that read/write registers. > -- > 1.8.3.2 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx