Quoting Lionel Landwerlin (2019-05-21 15:08:53) > Here we introduce a mechanism by which the execbuf part of the i915 > driver will be able to request that a batch buffer containing the > programming for a particular OA config be created. > > We'll execute these OA configuration buffers right before executing a > set of userspace commands so that a particular user batchbuffer be > executed with a given OA configuration. > > This mechanism essentially allows the userspace driver to go through > several OA configuration without having to open/close the i915/perf > stream. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + > drivers/gpu/drm/i915/i915_drv.h | 22 ++- > drivers/gpu/drm/i915/i915_perf.c | 187 ++++++++++++++++--- > 3 files changed, 178 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > index a34ece53a771..bbcb80cf2a85 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > @@ -126,6 +126,7 @@ > */ > #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) > #define MI_LRI_FORCE_POSTED (1<<12) > +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) > #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) > #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) > #define MI_SRM_LRM_GLOBAL_GTT (1<<22) > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 1ad3818d2676..abd564bfa03b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1274,6 +1274,10 @@ struct i915_oa_config { > struct attribute *attrs[2]; > struct device_attribute sysfs_metric_id; > > + struct drm_i915_gem_object *obj; > + > + struct list_head vma_link; > + > atomic_t ref_count; > }; > > @@ -1856,11 +1860,21 @@ struct drm_i915_private { > struct mutex metrics_lock; > > /* > - * List of dynamic configurations, you need to hold > - * dev_priv->perf.metrics_lock to access it. > + * List of dynamic configurations (struct i915_oa_config), you > + * need to hold dev_priv->perf.metrics_lock to access it. > */ > struct idr metrics_idr; > > + /* > + * List of dynamic configurations (struct i915_oa_config) > + * which have an allocated buffer in GGTT for reconfiguration, > + * you need to hold dev_priv->perf.metrics_lock to access it. > + * Elements are added to the list lazilly on execbuf (when a > + * particular configuration is requested). The list is freed > + * upon closing the perf stream. > + */ > + struct list_head metrics_buffers; > + > /* > * Lock associated with anything below within this structure > * except exclusive_stream. > @@ -3136,6 +3150,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, > void i915_oa_init_reg_state(struct intel_engine_cs *engine, > struct intel_context *ce, > u32 *reg_state); > +int i915_perf_get_oa_config(struct drm_i915_private *i915, > + int metrics_set, > + struct i915_oa_config **out_config, > + struct drm_i915_gem_object **out_obj); > > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 8c7fa7f7014b..7e0ebd4bc8f2 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -365,9 +365,16 @@ struct perf_open_properties { > int oa_period_exponent; > }; > > -static void free_oa_config(struct drm_i915_private *dev_priv, > - struct i915_oa_config *oa_config) > +static void put_oa_config(struct i915_oa_config *oa_config) > { > + if (!atomic_dec_and_test(&oa_config->ref_count)) > + return; > + > + if (oa_config->obj) { > + list_del(&oa_config->vma_link); > + i915_gem_object_put(oa_config->obj); > + } > + > if (!PTR_ERR(oa_config->flex_regs)) > kfree(oa_config->flex_regs); > if (!PTR_ERR(oa_config->b_counter_regs)) > @@ -377,38 +384,142 @@ static void free_oa_config(struct drm_i915_private *dev_priv, > kfree(oa_config); > } > > -static void put_oa_config(struct drm_i915_private *dev_priv, > - struct i915_oa_config *oa_config) > +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) > { > - if (!atomic_dec_and_test(&oa_config->ref_count)) > - return; > + u32 i; > + > + for (i = 0; i < n_regs; i++) { > + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { > + u32 n_lri = min(n_regs - i, > + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); > > - free_oa_config(dev_priv, oa_config); > + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); > + } > + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); > + *cs++ = reg_data[i].value; > + } > + > + return cs; > } > > -static int get_oa_config(struct drm_i915_private *dev_priv, > - int metrics_set, > - struct i915_oa_config **out_config) > +static int alloc_oa_config_buffer(struct drm_i915_private *i915, > + struct i915_oa_config *oa_config) > { > + struct drm_i915_gem_object *bo; > + size_t config_length = 0; > int ret; > + u32 *cs; > > - if (metrics_set == 1) { > - *out_config = &dev_priv->perf.oa.test_config; > - atomic_inc(&dev_priv->perf.oa.test_config.ref_count); > - return 0; > + if (oa_config->mux_regs_len > 0) { > + config_length += DIV_ROUND_UP(oa_config->mux_regs_len, > + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; > + config_length += oa_config->mux_regs_len * 8; > + } > + if (oa_config->b_counter_regs_len > 0) { > + config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len, > + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; > + config_length += oa_config->b_counter_regs_len * 8; > } > + if (oa_config->flex_regs_len > 0) { > + config_length += DIV_ROUND_UP(oa_config->flex_regs_len, > + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4; > + config_length += oa_config->flex_regs_len * 8; > + } > + config_length += 4; /* MI_BATCH_BUFFER_END */ > + config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE); > > - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); > + ret = i915_mutex_lock_interruptible(&i915->drm); struct_mutex not required for creating/populating an object. > if (ret) > return ret; > > - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); > - if (!*out_config) > - ret = -EINVAL; > - else > - atomic_inc(&(*out_config)->ref_count); > + bo = i915_gem_object_create(i915, config_length); > + if (IS_ERR(bo)) { > + ret = PTR_ERR(bo); > + goto unlock; > + } > > - mutex_unlock(&dev_priv->perf.metrics_lock); > + cs = i915_gem_object_pin_map(bo, I915_MAP_WB); > + if (IS_ERR(cs)) { > + ret = PTR_ERR(cs); > + goto err_unref; > + } > + > + memset(cs, 0, config_length); Already zeroed, and write_cs_mi_lri() leaves no holes. > + cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len); > + cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len); > + cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len); > + > + *cs++ = MI_BATCH_BUFFER_END; > + i915_gem_object_flush_map(bo); > + i915_gem_object_unpin_map(bo); > + > + oa_config->obj = bo; > + > + goto unlock; > + > +err_unref: > + oa_config->obj = NULL; was never set. > + i915_gem_object_put(bo); You could avoid the unconditional jump by just taking the ref in oa_config->obj = i915_gem_object_get(bo); > +unlock: > + mutex_unlock(&i915->drm.struct_mutex); > + return ret; > +} _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx