Re: [PATCH 3/5] drm/i915/perf: allow for CS OA configs to be created lazily

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 21/05/2019 17:43, Chris Wilson wrote:
Quoting Lionel Landwerlin (2019-05-21 15:08:53)
Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
---
  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
  drivers/gpu/drm/i915/i915_drv.h              |  22 ++-
  drivers/gpu/drm/i915/i915_perf.c             | 187 ++++++++++++++++---
  3 files changed, 178 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index a34ece53a771..bbcb80cf2a85 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -126,6 +126,7 @@
   */
  #define MI_LOAD_REGISTER_IMM(x)        MI_INSTR(0x22, 2*(x)-1)
  #define   MI_LRI_FORCE_POSTED          (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
  #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
  #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
  #define   MI_SRM_LRM_GLOBAL_GTT                (1<<22)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1ad3818d2676..abd564bfa03b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1274,6 +1274,10 @@ struct i915_oa_config {
         struct attribute *attrs[2];
         struct device_attribute sysfs_metric_id;
+ struct drm_i915_gem_object *obj;
+
+       struct list_head vma_link;
+
         atomic_t ref_count;
  };
@@ -1856,11 +1860,21 @@ struct drm_i915_private {
                 struct mutex metrics_lock;
/*
-                * List of dynamic configurations, you need to hold
-                * dev_priv->perf.metrics_lock to access it.
+                * List of dynamic configurations (struct i915_oa_config), you
+                * need to hold dev_priv->perf.metrics_lock to access it.
                  */
                 struct idr metrics_idr;
+ /*
+                * List of dynamic configurations (struct i915_oa_config)
+                * which have an allocated buffer in GGTT for reconfiguration,
+                * you need to hold dev_priv->perf.metrics_lock to access it.
+                * Elements are added to the list lazilly on execbuf (when a
+                * particular configuration is requested). The list is freed
+                * upon closing the perf stream.
+                */
+               struct list_head metrics_buffers;
+
                 /*
                  * Lock associated with anything below within this structure
                  * except exclusive_stream.
@@ -3136,6 +3150,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
  void i915_oa_init_reg_state(struct intel_engine_cs *engine,
                             struct intel_context *ce,
                             u32 *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+                           int metrics_set,
+                           struct i915_oa_config **out_config,
+                           struct drm_i915_gem_object **out_obj);
/* i915_gem_evict.c */
  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8c7fa7f7014b..7e0ebd4bc8f2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -365,9 +365,16 @@ struct perf_open_properties {
         int oa_period_exponent;
  };
-static void free_oa_config(struct drm_i915_private *dev_priv,
-                          struct i915_oa_config *oa_config)
+static void put_oa_config(struct i915_oa_config *oa_config)
  {
+       if (!atomic_dec_and_test(&oa_config->ref_count))
+               return;
+
+       if (oa_config->obj) {
+               list_del(&oa_config->vma_link);
+               i915_gem_object_put(oa_config->obj);
+       }
+
         if (!PTR_ERR(oa_config->flex_regs))
                 kfree(oa_config->flex_regs);
         if (!PTR_ERR(oa_config->b_counter_regs))
@@ -377,38 +384,142 @@ static void free_oa_config(struct drm_i915_private *dev_priv,
         kfree(oa_config);
  }
-static void put_oa_config(struct drm_i915_private *dev_priv,
-                         struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
  {
-       if (!atomic_dec_and_test(&oa_config->ref_count))
-               return;
+       u32 i;
+
+       for (i = 0; i < n_regs; i++) {
+               if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+                       u32 n_lri = min(n_regs - i,
+                                       (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
- free_oa_config(dev_priv, oa_config);
+                       *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+               }
+               *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+               *cs++ = reg_data[i].value;
+       }
+
+       return cs;
  }
-static int get_oa_config(struct drm_i915_private *dev_priv,
-                        int metrics_set,
-                        struct i915_oa_config **out_config)
+static int alloc_oa_config_buffer(struct drm_i915_private *i915,
+                                 struct i915_oa_config *oa_config)
  {
+       struct drm_i915_gem_object *bo;
+       size_t config_length = 0;
         int ret;
+       u32 *cs;
- if (metrics_set == 1) {
-               *out_config = &dev_priv->perf.oa.test_config;
-               atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
-               return 0;
+       if (oa_config->mux_regs_len > 0) {
+               config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
+                                             MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+               config_length += oa_config->mux_regs_len * 8;
+       }
+       if (oa_config->b_counter_regs_len > 0) {
+               config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
+                                             MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+               config_length += oa_config->b_counter_regs_len * 8;
         }
+       if (oa_config->flex_regs_len > 0) {
+               config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
+                                             MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+               config_length += oa_config->flex_regs_len * 8;
+       }
+       config_length += 4; /* MI_BATCH_BUFFER_END */
+       config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
- ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
+       ret = i915_mutex_lock_interruptible(&i915->drm);
struct_mutex not required for creating/populating an object.


Oh nice! I'll clean this up.

Thanks!



         if (ret)
                 return ret;
- *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
-       if (!*out_config)
-               ret = -EINVAL;
-       else
-               atomic_inc(&(*out_config)->ref_count);
+       bo = i915_gem_object_create(i915, config_length);
+       if (IS_ERR(bo)) {
+               ret = PTR_ERR(bo);
+               goto unlock;
+       }
- mutex_unlock(&dev_priv->perf.metrics_lock);
+       cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+       if (IS_ERR(cs)) {
+               ret = PTR_ERR(cs);
+               goto err_unref;
+       }
+
+       memset(cs, 0, config_length);
Already zeroed, and write_cs_mi_lri() leaves no holes.

+       cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
+       cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
+       cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
+
+       *cs++ = MI_BATCH_BUFFER_END;
+
i915_gem_object_flush_map(bo);

+       i915_gem_object_unpin_map(bo);
+
+       oa_config->obj = bo;
+
+       goto unlock;
+
+err_unref:
+       oa_config->obj = NULL;
was never set.

+       i915_gem_object_put(bo);
You could avoid the unconditional jump by just taking the ref in
oa_config->obj = i915_gem_object_get(bo);

+unlock:
+       mutex_unlock(&i915->drm.struct_mutex);
+       return ret;
+}


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux