From: Sourab Gupta <sourab.gupta@xxxxxxxxx> This patch adds support for opening multiple concurrent perf streams for different gpu engines, while having the restriction to open only a single stream open for a particular gpu engine. This enables userspace client to open multiple streams, one per engine, at any time to capture sample data for multiple gpu engines. Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_perf.c | 69 ++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a9a123b..9ccac83 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2161,7 +2161,7 @@ struct drm_i915_private { spinlock_t hook_lock; struct hrtimer poll_check_timer; - struct i915_perf_stream *exclusive_stream; + struct i915_perf_stream *ring_stream[I915_NUM_ENGINES]; wait_queue_head_t poll_wq[I915_NUM_ENGINES]; struct { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 65b4af6..aa3589e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -944,7 +944,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * an invalid ID. It could be good to annotate these * reports with a _CTX_SWITCH_AWAY reason later. */ - if (!dev_priv->perf.exclusive_stream->ctx || + if (!stream->ctx || dev_priv->perf.oa.specific_ctx_id == ctx_id || dev_priv->perf.oa.oa_buffer.last_ctx_id == ctx_id) { @@ -955,7 +955,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * the switch-away reports with an invalid * context id to be recognisable by userspace. */ - if (dev_priv->perf.exclusive_stream->ctx && + if (stream->ctx && dev_priv->perf.oa.specific_ctx_id != ctx_id) report32[2] = 0x1fffff; @@ -1596,7 +1596,7 @@ static void i915_ring_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - BUG_ON(stream != dev_priv->perf.exclusive_stream); + BUG_ON(stream != dev_priv->perf.ring_stream[stream->engine]); if (stream->using_oa) { dev_priv->perf.oa.ops.disable_metric_set(dev_priv); @@ -1610,7 +1610,7 @@ static void i915_ring_stream_destroy(struct i915_perf_stream *stream) if (stream->cs_mode) free_command_stream_buf(dev_priv, stream->engine); - dev_priv->perf.exclusive_stream = NULL; + dev_priv->perf.ring_stream[stream->engine] = NULL; } static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) @@ -2012,14 +2012,14 @@ static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { assert_spin_locked(&dev_priv->perf.hook_lock); - if (dev_priv->perf.exclusive_stream->state != + if (dev_priv->perf.ring_stream[RCS]->state != I915_PERF_STREAM_DISABLED) { unsigned long ctx_id = 0; - if (dev_priv->perf.exclusive_stream->ctx) + if (dev_priv->perf.ring_stream[RCS]->ctx) ctx_id = dev_priv->perf.oa.specific_ctx_id; - if (dev_priv->perf.exclusive_stream->ctx == NULL || ctx_id) { + if (dev_priv->perf.ring_stream[RCS]->ctx == NULL || ctx_id) { bool periodic = dev_priv->perf.oa.periodic; u32 period_exponent = dev_priv->perf.oa.period_exponent; u32 report_format = dev_priv->perf.oa.oa_buffer.format; @@ -2144,15 +2144,6 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream, SAMPLE_TS); int ret; - /* To avoid the complexity of having to accurately filter - * counter reports and marshal to the appropriate client - * we currently only allow exclusive access - */ - if (dev_priv->perf.exclusive_stream) { - DRM_ERROR("Stream already in use\n"); - return -EBUSY; - } - if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) { if (IS_HASWELL(dev_priv->dev)) { DRM_ERROR( @@ -2170,6 +2161,12 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream, if (require_oa_unit) { int format_size; + /* Only allow exclusive access per stream */ + if (dev_priv->perf.ring_stream[RCS]) { + DRM_ERROR("Stream:0 already in use\n"); + return -EBUSY; + } + if (!dev_priv->perf.oa.ops.init_oa_buffer) { DRM_ERROR("OA unit not supported\n"); return -ENODEV; @@ -2305,6 +2302,13 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream, } if (props->cs_mode) { + /* Only allow exclusive access per stream */ + if (dev_priv->perf.ring_stream[props->engine]) { + DRM_ERROR("Stream:%d already in use\n", props->engine); + ret = -EBUSY; + goto cs_error; + } + if (!cs_sample_data) { DRM_ERROR( "Ring given without requesting any CS data to sample"); @@ -2345,7 +2349,7 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; - dev_priv->perf.exclusive_stream = stream; + dev_priv->perf.ring_stream[stream->engine] = stream; return 0; @@ -2379,8 +2383,8 @@ static void i915_oa_context_pin_notify_locked(struct drm_i915_private *dev_priv, dev_priv->perf.oa.ops.update_hw_ctx_id_locked == NULL) return; - if (dev_priv->perf.exclusive_stream && - dev_priv->perf.exclusive_stream->ctx == context) { + if (dev_priv->perf.ring_stream[RCS] && + dev_priv->perf.ring_stream[RCS]->ctx == context) { struct drm_i915_gem_object *obj = context->legacy_hw_ctx.rcs_state; u32 ctx_id = i915_gem_obj_ggtt_offset(obj); @@ -2449,8 +2453,8 @@ void i915_oa_legacy_ctx_switch_notify(struct drm_i915_gem_request *req) if (dev_priv->perf.oa.ops.legacy_ctx_switch_unlocked == NULL) return; - if (dev_priv->perf.exclusive_stream && - dev_priv->perf.exclusive_stream->state != + if (dev_priv->perf.ring_stream[RCS] && + dev_priv->perf.ring_stream[RCS]->state != I915_PERF_STREAM_DISABLED) { /* XXX: We don't take a lock here and this may run @@ -2591,23 +2595,26 @@ static ssize_t i915_perf_read(struct file *file, return ret; } -static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer) +static void wake_up_perf_streams(void *data, async_cookie_t cookie) { + struct drm_i915_private *dev_priv = data; struct i915_perf_stream *stream; - struct drm_i915_private *dev_priv = - container_of(hrtimer, typeof(*dev_priv), - perf.poll_check_timer); - - /* No need to protect the streams list here, since the hrtimer is - * disabled before the stream is removed from list, and currently a - * single exclusive_stream is supported. - * XXX: revisit this when multiple concurrent streams are supported. - */ + mutex_lock(&dev_priv->perf.streams_lock); list_for_each_entry(stream, &dev_priv->perf.streams, link) { if (stream_have_data__unlocked(stream)) wake_up(&dev_priv->perf.poll_wq[stream->engine]); } + mutex_unlock(&dev_priv->perf.streams_lock); +} + +static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer) +{ + struct drm_i915_private *dev_priv = + container_of(hrtimer, typeof(*dev_priv), + perf.poll_check_timer); + + async_schedule(wake_up_perf_streams, dev_priv); hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx