To allow opening CS perf streams for other engines, this patch associates exclusive stream with each engine and correspondingly updates the required fields per stream. Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 26 +++-- drivers/gpu/drm/i915/i915_perf.c | 166 +++++++++++++++----------------- drivers/gpu/drm/i915/intel_engine_cs.c | 4 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 8 ++ 5 files changed, 102 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9989f01..554f84fb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2050,6 +2050,11 @@ struct i915_perf_stream { */ struct drm_i915_private *dev_priv; + /* + * @engine: Engine to which this stream corresponds. + */ + struct intel_engine_cs *engine; + /** * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` * properties given when opening a stream, representing the contents @@ -2114,6 +2119,10 @@ struct i915_perf_stream { u32 last_ctx_id; u64 last_pid; u32 last_tag; + + u32 specific_ctx_id; + wait_queue_head_t poll_wq; + bool pollin; }; /** @@ -2601,6 +2610,8 @@ struct drm_i915_private { */ struct idr metrics_idr; + struct hrtimer poll_check_timer; + /* * Lock associated with anything below within this structure * except exclusive_stream. @@ -2608,21 +2619,6 @@ struct drm_i915_private { struct mutex lock; struct { - /* - * The stream currently using the OA unit. If accessed - * outside a syscall associated to its file - * descriptor, you need to hold - * dev_priv->drm.struct_mutex. - */ - struct i915_perf_stream *exclusive_stream; - struct srcu_struct srcu; - - u32 specific_ctx_id; - - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - bool pollin; - /** * For rate limiting any notifications of spurious * invalid OA reports diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 988132d..b6bd730 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -551,20 +551,20 @@ void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request, u32 tag) { struct drm_i915_private *dev_priv = request->i915; + struct intel_engine_cs *engine = request->engine; struct i915_perf_stream *stream; int idx; if (!dev_priv->perf.initialized) return; - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); - stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + idx = srcu_read_lock(&engine->srcu); + stream = srcu_dereference(engine->exclusive_stream, + &engine->srcu); if (stream && stream->enabled && stream->cs_mode) stream->ops->emit_sample_capture(stream, request, preallocate, tag); - - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); } /** @@ -663,18 +663,19 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream, void i915_perf_patch_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; + struct intel_engine_cs *engine = request->engine; struct i915_perf_stream *stream; int idx; if (!dev_priv->perf.initialized) return; - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); - stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + idx = srcu_read_lock(&engine->srcu); + stream = srcu_dereference(engine->exclusive_stream, + &engine->srcu); if (stream && stream->enabled && stream->cs_mode) stream->ops->patch_request(stream, request); - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); } /** @@ -1198,18 +1199,18 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!dev_priv->perf.oa.exclusive_stream->ctx || - dev_priv->perf.oa.specific_ctx_id == ctx_id || + if (!stream->ctx || + stream->specific_ctx_id == ctx_id || (dev_priv->perf.oa.oa_buffer.last_ctx_id == - dev_priv->perf.oa.specific_ctx_id) || + stream->specific_ctx_id) || reason & OAREPORT_REASON_CTX_SWITCH) { /* * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (dev_priv->perf.oa.exclusive_stream->ctx && - dev_priv->perf.oa.specific_ctx_id != ctx_id) { + if (stream->ctx && + stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -1852,7 +1853,7 @@ static int i915_perf_stream_wait_unlocked(struct i915_perf_stream *stream) } } - return wait_event_interruptible(dev_priv->perf.oa.poll_wq, + return wait_event_interruptible(stream->poll_wq, stream_have_data_unlocked(stream)); } @@ -1871,9 +1872,7 @@ static void i915_perf_stream_poll_wait(struct i915_perf_stream *stream, struct file *file, poll_table *wait) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); + poll_wait(file, &stream->poll_wq, wait); } /** @@ -1921,7 +1920,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) struct drm_i915_private *dev_priv = stream->dev_priv; if (i915.enable_execlists) - dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; + stream->specific_ctx_id = stream->ctx->hw_id; else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct intel_ring *ring; @@ -1948,7 +1947,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * i915_ggtt_offset() on the fly) considering the difference * with gen8+ and execlists */ - dev_priv->perf.oa.specific_ctx_id = + stream->specific_ctx_id = i915_ggtt_offset(stream->ctx->engine[engine->id].state); } @@ -1967,13 +1966,13 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) struct drm_i915_private *dev_priv = stream->dev_priv; if (i915.enable_execlists) { - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id = INVALID_CTX_ID; } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id = INVALID_CTX_ID; engine->context_unpin(engine, stream->ctx); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -2035,23 +2034,24 @@ static void free_perf_samples(struct i915_perf_stream *stream) static void i915_perf_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_engine_cs *engine = stream->engine; struct i915_perf_stream *engine_stream; int idx; - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); - engine_stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + idx = srcu_read_lock(&engine->srcu); + engine_stream = srcu_dereference(engine->exclusive_stream, + &engine->srcu); if (WARN_ON(stream != engine_stream)) return; - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ mutex_lock(&dev_priv->drm.struct_mutex); - rcu_assign_pointer(dev_priv->perf.oa.exclusive_stream, NULL); - synchronize_srcu(&dev_priv->perf.oa.srcu); + rcu_assign_pointer(engine->exclusive_stream, NULL); + synchronize_srcu(&engine->srcu); mutex_unlock(&dev_priv->drm.struct_mutex); if (stream->using_oa) { @@ -2119,11 +2119,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) * memory... */ memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ - dev_priv->perf.oa.pollin = false; } static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) @@ -2177,12 +2172,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * memory... */ memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - - /* - * Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ - dev_priv->perf.oa.pollin = false; } static int alloc_obj(struct drm_i915_private *dev_priv, @@ -2721,6 +2710,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_oa_enable(struct drm_i915_private *dev_priv) { struct i915_perf_stream *stream; + struct intel_engine_cs *engine = dev_priv->engine[RCS]; int idx; /* @@ -2734,13 +2724,12 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv) */ gen7_init_oa_buffer(dev_priv); - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); - stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + idx = srcu_read_lock(&engine->srcu); + stream = srcu_dereference(engine->exclusive_stream, + &engine->srcu); if (!stream->enabled) { - struct i915_gem_context *ctx = - dev_priv->perf.oa.exclusive_stream->ctx; - u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; + struct i915_gem_context *ctx = stream->ctx; + u32 ctx_id = stream->specific_ctx_id; bool periodic = dev_priv->perf.oa.periodic; u32 period_exponent = dev_priv->perf.oa.period_exponent; u32 report_format = dev_priv->perf.oa.oa_buffer.format; @@ -2755,7 +2744,7 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv) GEN7_OACONTROL_ENABLE); } else I915_WRITE(GEN7_OACONTROL, 0); - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); } static void gen8_oa_enable(struct drm_i915_private *dev_priv) @@ -2800,7 +2789,7 @@ static void i915_perf_stream_enable(struct i915_perf_stream *stream) dev_priv->perf.oa.ops.oa_enable(dev_priv); if (stream->cs_mode || dev_priv->perf.oa.periodic) - hrtimer_start(&dev_priv->perf.oa.poll_check_timer, + hrtimer_start(&dev_priv->perf.poll_check_timer, ns_to_ktime(POLL_PERIOD), HRTIMER_MODE_REL_PINNED); } @@ -2828,7 +2817,7 @@ static void i915_perf_stream_disable(struct i915_perf_stream *stream) struct drm_i915_private *dev_priv = stream->dev_priv; if (stream->cs_mode || dev_priv->perf.oa.periodic) - hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); + hrtimer_cancel(&dev_priv->perf.poll_check_timer); if (stream->cs_mode) i915_perf_stream_release_samples(stream); @@ -2931,20 +2920,21 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, return -ENODEV; } + engine = dev_priv->engine[RCS]; /* * To avoid the complexity of having to accurately filter * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); + idx = srcu_read_lock(&engine->srcu); curr_stream = srcu_dereference( - dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + engine->exclusive_stream, + &engine->srcu); if (curr_stream) { DRM_ERROR("Stream already opened\n"); return -EBUSY; } - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); stream->engine = engine; @@ -2959,7 +2949,6 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - engine = dev_priv->engine[RCS]; stream->using_oa = true; format_size = @@ -3074,16 +3063,17 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, stream->sample_size += 8; } - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); + engine = dev_priv->engine[props->engine]; + idx = srcu_read_lock(&engine->srcu); curr_stream = srcu_dereference( - dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + engine->exclusive_stream, + &engine->srcu); if (curr_stream) { DRM_ERROR("Stream already opened\n"); ret = -EINVAL; goto err_enable; } - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); stream->engine = engine; ret = alloc_cs_buffer(stream); @@ -3094,6 +3084,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, } stream->ops = &perf_stream_ops; + init_waitqueue_head(&stream->poll_wq); + stream->pollin = false; /* Lock device for exclusive_stream access late because * enable_metric_set() might lock as well on gen8+. @@ -3101,7 +3093,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, ret = i915_mutex_lock_interruptible(&dev_priv->drm); if (ret) goto err_lock; - rcu_assign_pointer(dev_priv->perf.oa.exclusive_stream, stream); + rcu_assign_pointer(engine->exclusive_stream, stream); mutex_unlock(&dev_priv->drm.struct_mutex); return 0; @@ -3135,7 +3127,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, if (engine->id != RCS) return; - stream = engine->i915->perf.oa.exclusive_stream; + stream = engine->exclusive_stream; if (stream) gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config); } @@ -3250,7 +3242,7 @@ static ssize_t i915_perf_read(struct file *file, * before reporting another POLLIN event. */ if (ret >= 0 || ret == -EAGAIN) - dev_priv->perf.oa.pollin = false; + stream->pollin = false; return ret; } @@ -3260,21 +3252,21 @@ static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer) struct i915_perf_stream *stream; struct drm_i915_private *dev_priv = container_of(hrtimer, typeof(*dev_priv), - perf.oa.poll_check_timer); + perf.poll_check_timer); int idx; struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); - stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); + idx = srcu_read_lock(&engine->srcu); + stream = srcu_dereference(engine->exclusive_stream, + &engine->srcu); if (stream && stream->enabled && stream_have_data_unlocked(stream)) { - dev_priv->perf.oa.pollin = true; - wake_up(&dev_priv->perf.oa.poll_wq); + stream->pollin = true; + wake_up(&stream->poll_wq); } - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); + srcu_read_unlock(&engine->srcu, idx); } hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); @@ -3313,7 +3305,7 @@ static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, * the hrtimer/oa_poll_check_timer_cb to notify us when there are * samples to read. */ - if (dev_priv->perf.oa.pollin) + if (stream->pollin) events |= POLLIN; return events; @@ -4373,20 +4365,24 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_perf_streams_mark_idle(struct drm_i915_private *dev_priv) { struct i915_perf_stream *stream; + struct intel_engine_cs *engine; + enum intel_engine_id id; int idx; - idx = srcu_read_lock(&dev_priv->perf.oa.srcu); - stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream, - &dev_priv->perf.oa.srcu); - if (stream && stream->enabled && stream->cs_mode) { - struct reservation_object *resv = - stream->cs_buffer.vma->resv; - - reservation_object_lock(resv, NULL); - reservation_object_add_excl_fence(resv, NULL); - reservation_object_unlock(resv); + for_each_engine(engine, dev_priv, id) { + idx = srcu_read_lock(&engine->srcu); + stream = srcu_dereference(engine->exclusive_stream, + &engine->srcu); + if (stream && stream->enabled && stream->cs_mode) { + struct reservation_object *resv = + stream->cs_buffer.vma->resv; + + reservation_object_lock(resv, NULL); + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + srcu_read_unlock(&engine->srcu, idx); } - srcu_read_unlock(&dev_priv->perf.oa.srcu, idx); } /** @@ -4482,19 +4478,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv) } if (dev_priv->perf.oa.timestamp_frequency) { - hrtimer_init(&dev_priv->perf.oa.poll_check_timer, + hrtimer_init(&dev_priv->perf.poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - dev_priv->perf.oa.poll_check_timer.function = - poll_check_timer_cb; - init_waitqueue_head(&dev_priv->perf.oa.poll_wq); + dev_priv->perf.poll_check_timer.function = poll_check_timer_cb; mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); - /* Perf stream related initialization for Engine */ - rcu_assign_pointer(dev_priv->perf.oa.exclusive_stream, NULL); - init_srcu_struct(&dev_priv->perf.oa.srcu); - oa_sample_rate_hard_limit = dev_priv->perf.oa.timestamp_frequency / 2; dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); @@ -4532,7 +4522,5 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); - cleanup_srcu_struct(&dev_priv->perf.oa.srcu); - dev_priv->perf.initialized = false; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a6ac9d0..20df519 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -317,6 +317,10 @@ int intel_engines_init(struct drm_i915_private *dev_priv) goto cleanup; GEM_BUG_ON(!engine->submit_request); + + /* Perf stream related initialization for Engine */ + rcu_assign_pointer(engine->exclusive_stream, NULL); + init_srcu_struct(&engine->srcu); } return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cdf084e..501c544 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1622,6 +1622,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); + cleanup_srcu_struct(&engine->srcu); + dev_priv->engine[engine->id] = NULL; kfree(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 02d8974..2fcfaf4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -441,6 +441,14 @@ struct intel_engine_cs { * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); + + /* + * For RCS, This stream can use the OA unit. All accesses guarded by + * SRCU including those in syscall as we can't hold + * dev_priv->drm.struct_mutex in the execbuf path. + */ + struct i915_perf_stream __rcu *exclusive_stream; + struct srcu_struct srcu; }; static inline unsigned int -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx