Quoting Lionel Landwerlin (2019-08-30 15:47:24) > +static int > +get_execbuf_oa_config(struct i915_execbuffer *eb) > +{ > + int err = 0; > + > + eb->perf_file = NULL; > + eb->oa_config = NULL; > + eb->oa_vma = NULL; > + eb->oa_bo = NULL; > + > + if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0) > + return 0; > + > + eb->perf_file = fget(eb->extensions.perf_config.perf_fd); > + if (!eb->perf_file) > + return -EINVAL; > + > + err = i915_mutex_lock_interruptible(&eb->i915->drm); > + if (err) { > + fput(eb->perf_file); > + eb->perf_file = NULL; > + return err; > + } > + > + if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream) > + err = -EINVAL; > + > + mutex_unlock(&eb->i915->drm.struct_mutex); So why can i915->perf.execlusive_stream not change after this point? > + > + if (!err) { > + err = i915_perf_get_oa_config_and_bo( > + eb->i915->perf.exclusive_stream, > + eb->extensions.perf_config.oa_config, > + &eb->oa_config, &eb->oa_bo); > + } > + > + return err; > +} > + > static int __reloc_gpu_alloc(struct i915_execbuffer *eb, > struct i915_vma *vma, > unsigned int len) > @@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file) > spin_unlock(&file_priv->mm.lock); > } > > +static int eb_oa_config(struct i915_execbuffer *eb) > +{ > + struct i915_perf_stream *perf_stream; > + int err; > + > + if (!eb->oa_config) > + return 0; > + > + perf_stream = eb->perf_file->private_data; > + > + err = mutex_lock_interruptible(&perf_stream->config_mutex); > + if (err) > + return err; > + > + err = i915_active_request_set(&perf_stream->active_config_rq, > + eb->request); > + if (err) > + goto out; > + > + /* > + * If the config hasn't changed, skip reconfiguring the HW (this is > + * subject to a delay we want to avoid has much as possible). > + */ > + if (eb->oa_config == perf_stream->oa_config) > + goto out; > + We need to wait for any exclusive fences in case we migrate this object in future: i915_vma_lock(eb->oa_vma); err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */ if (err = 0) err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0); i915_vma_unlock(eb->oa_vma); Though this raises an interesting point, we do not actually want to emit a semaphore here (albeit the engine is fixed atm) as we are after the point where we have declared all semaphore waits completed. (Not an issue to worry about right now!) > + if (err) > + goto out; > + > + err = eb->engine->emit_bb_start(eb->request, > + eb->oa_vma->node.start, > + 0, I915_DISPATCH_SECURE); > + if (err) > + goto out; > + > + swap(eb->oa_config, perf_stream->oa_config); > + > +out: > + mutex_unlock(&perf_stream->config_mutex); > + > + return err; > +} > + > static int eb_submit(struct i915_execbuffer *eb) > { > int err; > @@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb) > return err; > } > > + err = eb_oa_config(eb); > + if (err) > + return err; Ok, definitely needs to be after the waits! > + > err = eb->engine->emit_bb_start(eb->request, > eb->batch->node.start + > eb->batch_start_offset, > @@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d > return 0; > } > > +static int parse_perf_config(struct i915_user_extension __user *ext, void *data) > +{ > + struct i915_execbuffer *eb = data; > + > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) > + return -EINVAL; > + > + if (copy_from_user(&eb->extensions.perf_config, ext, > + sizeof(eb->extensions.perf_config))) > + return -EFAULT; > + > + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF); > + > + return 0; > +} > + > static const i915_user_extension_fn execbuf_extensions[] = { > [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, > + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config, > }; > > static int > @@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > } > > - err = eb_create(&eb); > + err = get_execbuf_oa_config(&eb); > if (err) > goto err_out_fence; > > + err = eb_create(&eb); > + if (err) > + goto err_oa_config; > + > GEM_BUG_ON(!eb.lut_size); > > err = eb_select_context(&eb); > @@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > if (unlikely(err)) > goto err_context; > > + if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { > + struct i915_perf_stream *perf_stream = > + eb.perf_file->private_data; > + > + if (perf_stream->engine != eb.engine) { > + err = -EINVAL; > + goto err_engine; > + } > + } > + > err = i915_mutex_lock_interruptible(dev); > if (err) > goto err_engine; > @@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > } > > + if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { > + eb.oa_vma = i915_vma_instance(eb.oa_bo, > + &eb.engine->i915->ggtt.vm, NULL); eb.engine->gt->ggtt.vm The i915_vma_instance() can be created outside of the struct_mutex, and once we have the oa_vma, we don't need to keep a oa_bo pointer. Later we can do the i915_vma_pin() before struct_mutex as well. Thanks, that's looking a better with regard the plan to eliminate struct_mutex. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx