This adds the ability for userspace to request that the kernel track & record sseu configuration changes. These changes are inserted into the perf stream so that userspace can interpret the OA reports using the configuration applied at the time the OA reports where generated. v2: Handle timestamps wrapping around 32bits (Lionel) v3: Use void * instead of u8 * for buffer virtual address (Chris) Use i915_vam_unpin_and_release (Chris) Fix a deadlock when SSEU buffer overflows (Lionel) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 45 ++++ drivers/gpu/drm/i915/i915_gem_context.c | 3 + drivers/gpu/drm/i915/i915_gem_context.h | 21 ++ drivers/gpu/drm/i915/i915_perf.c | 454 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.c | 7 +- include/uapi/drm/i915_drm.h | 35 +++ 6 files changed, 535 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7ef1908e3a98..82f4e52718eb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1970,6 +1970,12 @@ struct i915_perf_stream { bool noa_restore; /** + * @has_sseu: Whether the stream emits SSEU configuration changes + * reports. + */ + bool has_sseu; + + /** * @enabled: Whether the stream is currently enabled, considering * whether the stream was opened in a disabled state and based * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. @@ -2508,6 +2514,44 @@ struct drm_i915_private { */ atomic_t noa_restore; } oa; + + struct { + /** + * Buffer containing change reports of the SSEU + * configuration. + */ + struct i915_vma *vma; + void *vaddr; + + /** + * Scheduler write to the head, and the perf driver + * reads from tail. + */ + u32 head; + u32 tail; + + /** + * Is the sseu buffer enabled. + */ + bool enabled; + + /** + * Whether the buffer overflown. + */ + bool overflow; + + /** + * Keeps track of how many times the OA unit has been + * enabled. This number is used to discard stale + * @perf_sseu in @i915_gem_context. + */ + atomic64_t enable_no; + + /** + * Lock writes & tail pointer updates on this buffer. + */ + spinlock_t ptr_lock; + } sseu_buffer; } perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ @@ -3558,6 +3602,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req); +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 60d0bbf0ae2b..368bb527a75a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -510,6 +510,9 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags) if (IS_ERR(cs)) return PTR_ERR(cs); + /* Notify perf of possible change in SSEU configuration. */ + i915_perf_emit_sseu_config(req); + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index e8247e2f6011..35ebdde1cd16 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -194,6 +194,27 @@ struct i915_gem_context { /** remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; + + /** + * @perf_sseu: last SSEU configuration notified to userspace + * + * SSEU configuration changes are notified to userspace through the + * perf infrastructure. We keep track here of the last notified + * configuration for a given context since configuration can change + * per engine. + */ + struct sseu_dev_info perf_sseu; + + /** + * @perf_enable_no: last perf enable identifier + * + * Userspace can enable/disable the perf infrastructure whenever it + * wants without reconfiguring the OA unit. This number is updated at + * the same time as @perf_sseu by copying + * dev_priv->perf.oa.oa_sseu.enable_no which changes every time the + * user enables the OA unit. + */ + u64 perf_enable_no; }; static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 3f49ce69641b..87ebc450c456 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -212,6 +212,15 @@ */ #define OA_BUFFER_SIZE SZ_16M +/* Dimension the SSEU buffer based on the size of the OA buffer to ensure we + * don't run out of space before the OA unit notifies us new data is + * available. + */ +#define SSEU_BUFFER_ITEM_SIZE sizeof(struct perf_sseu_change) +#define SSEU_BUFFER_SIZE ((OA_BUFFER_SIZE * SSEU_BUFFER_ITEM_SIZE) / \ + 256 /* Maximum report size */) +#define SSEU_BUFFER_NB_ITEM (SSEU_BUFFER_SIZE / SSEU_BUFFER_ITEM_SIZE) + #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) /** @@ -352,6 +361,8 @@ struct perf_open_properties { bool noa_restore; + bool sseu_enable; + /* OA sampling state */ int metrics_set; int oa_format; @@ -359,6 +370,19 @@ struct perf_open_properties { int oa_period_exponent; }; +struct perf_sseu_change { + u32 timestamp; + struct drm_i915_perf_sseu_change change; +}; + +static void i915_oa_stream_disable(struct i915_perf_stream *stream); +static void i915_oa_stream_enable(struct i915_perf_stream *stream); + +static bool timestamp_passed(u32 t0, u32 t1) +{ + return (s32)(t0 - t1) >= 0; +} + static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) { return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; @@ -571,6 +595,87 @@ static int append_oa_sample(struct i915_perf_stream *stream, return 0; } +static int append_sseu_change(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset, + const struct perf_sseu_change *sseu_report) +{ + struct drm_i915_perf_record_header header; + + header.type = DRM_I915_PERF_RECORD_SSEU_CHANGE; + header.pad = 0; + header.size = sizeof(struct drm_i915_perf_record_header) + + sizeof(struct drm_i915_perf_sseu_change); + + if ((count - *offset) < header.size) + return -ENOSPC; + + buf += *offset; + if (copy_to_user(buf, &header, sizeof(header))) + return -EFAULT; + buf += sizeof(header); + + if (copy_to_user(buf, &sseu_report->change, + sizeof(struct drm_i915_perf_sseu_change))) + return -EFAULT; + + (*offset) += header.size; + + return 0; +} + +static struct perf_sseu_change * +sseu_buffer_peek(struct drm_i915_private *dev_priv, u32 head, u32 tail) +{ + if (head == tail) + return NULL; + + return dev_priv->perf.sseu_buffer.vaddr + tail * SSEU_BUFFER_ITEM_SIZE; +} + +static struct perf_sseu_change * +sseu_buffer_advance(struct drm_i915_private *dev_priv, u32 head, u32 *tail) +{ + *tail = (*tail + 1) % SSEU_BUFFER_NB_ITEM; + + return sseu_buffer_peek(dev_priv, head, *tail); +} + +static void +sseu_buffer_read_pointers(struct i915_perf_stream *stream, u32 *head, u32 *tail) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + + if (!stream->has_sseu) { + *head = 0; + *tail = 0; + return; + } + + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + *head = dev_priv->perf.sseu_buffer.head; + *tail = dev_priv->perf.sseu_buffer.tail; + + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); +} + +static void +sseu_buffer_write_tail_pointer(struct i915_perf_stream *stream, u32 tail) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + + if (!stream->has_sseu) + return; + + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + dev_priv->perf.sseu_buffer.tail = tail; + + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); +} + /** * Copies all buffered OA reports into userspace read() buffer. * @stream: An i915-perf stream opened for OA metrics @@ -606,6 +711,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, unsigned int aged_tail_idx; u32 head, tail; u32 taken; + struct perf_sseu_change *sseu_report; + u32 sseu_head, sseu_tail; int ret = 0; if (WARN_ON(!stream->enabled)) @@ -646,6 +753,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, head, tail)) return -EIO; + /* Extract first SSEU report. */ + sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail); + sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail); for (/* none */; (taken = OA_TAKEN(tail, head)); @@ -686,6 +796,26 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, continue; } + while (sseu_report && + !timestamp_passed(sseu_report->timestamp, report32[1])) { + /* + * While filtering for a single context we avoid + * leaking the IDs of other contexts. + */ + if (stream->ctx && + stream->ctx->hw_id != sseu_report->change.hw_id) { + sseu_report->change.hw_id = INVALID_CTX_ID; + } + + ret = append_sseu_change(stream, buf, count, + offset, sseu_report); + if (ret) + break; + + sseu_report = sseu_buffer_advance(dev_priv, sseu_head, + &sseu_tail); + } + /* * XXX: Just keep the lower 21 bits for now since I'm not * entirely sure if the HW touches any of the higher bits in @@ -783,6 +913,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); } + /* Update tail pointer of the sseu buffer. */ + sseu_buffer_write_tail_pointer(stream, sseu_tail); + return ret; } @@ -812,12 +945,29 @@ static int gen8_oa_read(struct i915_perf_stream *stream, size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; + bool overflow = false; u32 oastatus; int ret; if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) return -EIO; + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + overflow = dev_priv->perf.sseu_buffer.overflow; + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + if (overflow) { + DRM_DEBUG("SSEU buffer overflow\n"); + + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); + if (ret) + return ret; + + i915_oa_stream_disable(stream); + i915_oa_stream_enable(stream); + } + oastatus = I915_READ(GEN8_OASTATUS); /* @@ -835,16 +985,18 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * that something has gone quite badly wrong. */ if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { - ret = append_oa_status(stream, buf, count, offset, - DRM_I915_PERF_RECORD_OA_BUFFER_LOST); - if (ret) - return ret; - DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", dev_priv->perf.oa.period_exponent); - dev_priv->perf.oa.ops.oa_disable(dev_priv); - dev_priv->perf.oa.ops.oa_enable(dev_priv); + if (!overflow) { + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); + if (ret) + return ret; + + i915_oa_stream_disable(stream); + i915_oa_stream_enable(stream); + } /* * Note: .oa_enable() is expected to re-init the oabuffer and @@ -854,10 +1006,12 @@ static int gen8_oa_read(struct i915_perf_stream *stream, } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { - ret = append_oa_status(stream, buf, count, offset, - DRM_I915_PERF_RECORD_OA_REPORT_LOST); - if (ret) - return ret; + if (!overflow) { + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_REPORT_LOST); + if (ret) + return ret; + } I915_WRITE(GEN8_OASTATUS, oastatus & ~GEN8_OASTATUS_REPORT_LOST); } @@ -900,6 +1054,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, unsigned int aged_tail_idx; u32 head, tail; u32 taken; + struct perf_sseu_change *sseu_report; + u32 sseu_head, sseu_tail; int ret = 0; if (WARN_ON(!stream->enabled)) @@ -937,6 +1093,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, head, tail)) return -EIO; + /* Extract first SSEU report. */ + sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail); + sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail); for (/* none */; (taken = OA_TAKEN(tail, head)); @@ -969,6 +1128,27 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, continue; } + /* Emit any potential sseu report. */ + while (sseu_report && + !timestamp_passed(sseu_report->timestamp, report32[1])) { + /* + * While filtering for a single context we avoid + * leaking the IDs of other contexts. + */ + if (stream->ctx && + stream->ctx->hw_id != sseu_report->change.hw_id) { + sseu_report->change.hw_id = INVALID_CTX_ID; + } + + ret = append_sseu_change(stream, buf, count, + offset, sseu_report); + if (ret) + break; + + sseu_report = sseu_buffer_advance(dev_priv, sseu_head, + &sseu_tail); + } + ret = append_oa_sample(stream, buf, count, offset, report); if (ret) break; @@ -998,6 +1178,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); } + /* Update tail pointer of the sseu buffer. */ + sseu_buffer_write_tail_pointer(stream, sseu_tail); + return ret; } @@ -1023,12 +1206,29 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; + bool overflow = false; u32 oastatus1; int ret; if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) return -EIO; + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + overflow = dev_priv->perf.sseu_buffer.overflow; + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + if (overflow) { + DRM_DEBUG("SSEU buffer overflow\n"); + + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); + if (ret) + return ret; + + i915_oa_stream_disable(stream); + i915_oa_stream_enable(stream); + } + oastatus1 = I915_READ(GEN7_OASTATUS1); /* XXX: On Haswell we don't have a safe way to clear oastatus1 @@ -1059,25 +1259,30 @@ static int gen7_oa_read(struct i915_perf_stream *stream, * now. */ if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { - ret = append_oa_status(stream, buf, count, offset, - DRM_I915_PERF_RECORD_OA_BUFFER_LOST); - if (ret) - return ret; - DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", dev_priv->perf.oa.period_exponent); - dev_priv->perf.oa.ops.oa_disable(dev_priv); - dev_priv->perf.oa.ops.oa_enable(dev_priv); + if (!overflow) { + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); + if (ret) + return ret; + + i915_oa_stream_disable(stream); + i915_oa_stream_enable(stream); + } oastatus1 = I915_READ(GEN7_OASTATUS1); } if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { - ret = append_oa_status(stream, buf, count, offset, - DRM_I915_PERF_RECORD_OA_REPORT_LOST); - if (ret) - return ret; + if (!overflow) { + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_REPORT_LOST); + if (ret) + return ret; + } + dev_priv->perf.oa.gen7_latched_oastatus1 |= GEN7_OASTATUS1_REPORT_LOST; } @@ -1227,18 +1432,21 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) } static void -free_oa_buffer(struct drm_i915_private *i915) +free_sseu_buffer(struct drm_i915_private *i915) { - mutex_lock(&i915->drm.struct_mutex); + i915_gem_object_unpin_map(i915->perf.sseu_buffer.vma->obj); + i915_vma_unpin_and_release(&i915->perf.sseu_buffer.vma); + i915->perf.sseu_buffer.vaddr = NULL; +} + +static void +free_oa_buffer(struct drm_i915_private *i915) +{ i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj); - i915_vma_unpin(i915->perf.oa.oa_buffer.vma); - i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj); + i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma); - i915->perf.oa.oa_buffer.vma = NULL; i915->perf.oa.oa_buffer.vaddr = NULL; - - mutex_unlock(&i915->drm.struct_mutex); } static void i915_oa_stream_destroy(struct i915_perf_stream *stream) @@ -1255,8 +1463,15 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + + if (stream->has_sseu) + free_sseu_buffer(dev_priv); + free_oa_buffer(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv); @@ -1269,6 +1484,97 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) } } +static void init_sseu_buffer(struct drm_i915_private *dev_priv, + bool need_lock) +{ + /* Cleanup auxilliary buffer. */ + memset(dev_priv->perf.sseu_buffer.vaddr, 0, SSEU_BUFFER_SIZE); + + atomic64_inc(&dev_priv->perf.sseu_buffer.enable_no); + + if (need_lock) + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + /* Initialize head & tail pointers. */ + dev_priv->perf.sseu_buffer.head = 0; + dev_priv->perf.sseu_buffer.tail = 0; + dev_priv->perf.sseu_buffer.overflow = false; + + if (need_lock) + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); +} + +static int alloc_sseu_buffer(struct drm_i915_private *dev_priv) +{ + struct drm_i915_gem_object *bo; + struct i915_vma *vma; + int ret = 0; + + bo = i915_gem_object_create(dev_priv, SSEU_BUFFER_SIZE); + if (IS_ERR(bo)) { + DRM_ERROR("Failed to allocate SSEU buffer\n"); + ret = PTR_ERR(bo); + goto out; + } + + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SSEU_BUFFER_SIZE, 0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + dev_priv->perf.sseu_buffer.vma = vma; + + dev_priv->perf.sseu_buffer.vaddr = + i915_gem_object_pin_map(bo, I915_MAP_WB); + if (IS_ERR(dev_priv->perf.sseu_buffer.vaddr)) { + ret = PTR_ERR(dev_priv->perf.sseu_buffer.vaddr); + goto err_unpin; + } + + atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0); + + init_sseu_buffer(dev_priv, true); + + DRM_DEBUG_DRIVER("OA SSEU Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", + i915_ggtt_offset(dev_priv->perf.sseu_buffer.vma), + dev_priv->perf.sseu_buffer.vaddr); + + goto out; + +err_unpin: + __i915_vma_unpin(vma); + +err_unref: + i915_gem_object_put(bo); + + dev_priv->perf.sseu_buffer.vaddr = NULL; + dev_priv->perf.sseu_buffer.vma = NULL; + +out: + return ret; +} + +static void sseu_enable(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + init_sseu_buffer(dev_priv, false); + + dev_priv->perf.sseu_buffer.enabled = true; + + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); +} + +static void sseu_disable(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + dev_priv->perf.sseu_buffer.enabled = false; + dev_priv->perf.sseu_buffer.overflow = false; + + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); +} + static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) { u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); @@ -1763,6 +2069,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, struct intel_context *ce = &ctx->engine[RCS]; u32 *regs; + memset(&ctx->perf_sseu, 0, sizeof(ctx->perf_sseu)); + ctx->perf_enable_no = 0; + /* OA settings will be set upon first use */ if (!ce->state) continue; @@ -1927,6 +2236,9 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + if (stream->has_sseu) + sseu_enable(dev_priv); + dev_priv->perf.oa.ops.oa_enable(dev_priv); if (dev_priv->perf.oa.periodic) @@ -1957,6 +2269,9 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + if (stream->has_sseu) + sseu_disable(dev_priv); + dev_priv->perf.oa.ops.oa_disable(dev_priv); if (dev_priv->perf.oa.periodic) @@ -2087,6 +2402,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, atomic_inc(&dev_priv->perf.oa.noa_restore); } + if (props->sseu_enable) { + ret = alloc_sseu_buffer(dev_priv); + if (ret) + goto err_sseu_buf_alloc; + + stream->has_sseu = true; + } + ret = alloc_oa_buffer(dev_priv); if (ret) goto err_oa_buf_alloc; @@ -2119,6 +2442,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, err_enable: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_runtime_pm_put(dev_priv); + free_sseu_buffer(dev_priv); + +err_sseu_buf_alloc: free_oa_buffer(dev_priv); err_oa_buf_alloc: @@ -2212,6 +2538,69 @@ int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req) } /** + * i915_perf_emit_sseu_config - emit the SSEU configuration of a gem request + * @req: the request + * + * If the OA unit is enabled, the write the SSEU configuration of a + * given request into a circular buffer. The buffer is read at the + * same time as the OA buffer and its elements are inserted into the + * perf stream to notify the userspace of SSEU configuration changes. + * This is needed to interpret values from the OA reports. + */ +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req) +{ + struct drm_i915_private *dev_priv = req->i915; + struct perf_sseu_change *report; + const struct sseu_dev_info *sseu; + u32 head, timestamp; + u64 perf_enable_no; + + /* Perf not supported. */ + if (!dev_priv->perf.initialized) + return; + + /* Nothing's changed, no need to signal userspace. */ + sseu = &req->ctx->engine[req->engine->id].sseu; + perf_enable_no = atomic64_read(&dev_priv->perf.sseu_buffer.enable_no); + if (perf_enable_no == req->ctx->perf_enable_no && + memcmp(&req->ctx->perf_sseu, sseu, sizeof(*sseu)) == 0) + return; + + memcpy(&req->ctx->perf_sseu, sseu, sizeof(*sseu)); + req->ctx->perf_enable_no = perf_enable_no; + + /* Read the timestamp outside the spin lock. */ + timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE)); + + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); + + if (!dev_priv->perf.sseu_buffer.enabled) + goto out; + + head = dev_priv->perf.sseu_buffer.head; + dev_priv->perf.sseu_buffer.head = + (dev_priv->perf.sseu_buffer.head + 1) % SSEU_BUFFER_NB_ITEM; + + /* This should never happen, unless we've wrongly dimensioned the SSEU + * buffer. */ + if (dev_priv->perf.sseu_buffer.head == dev_priv->perf.sseu_buffer.tail) { + dev_priv->perf.sseu_buffer.overflow = true; + goto out; + } + + report = dev_priv->perf.sseu_buffer.vaddr + head * SSEU_BUFFER_ITEM_SIZE; + memset(report, 0, sizeof(*report)); + + report->timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE)); + report->change.hw_id = req->ctx->hw_id; + report->change.slice_mask = sseu->slice_mask; + report->change.subslice_mask = sseu->subslice_mask; + +out: + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); +} + +/** * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation * @stream: An i915 perf stream * @file: An i915 perf stream file @@ -2867,6 +3256,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, case DRM_I915_PERF_PROP_NOA_RESTORE: props->noa_restore = true; break; + case DRM_I915_PERF_PROP_SSEU_CHANGE: + props->sseu_enable = true; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -3229,6 +3621,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv) atomic_set(&dev_priv->perf.oa.noa_restore, 0); + spin_lock_init(&dev_priv->perf.sseu_buffer.ptr_lock); + dev_priv->perf.sseu_buffer.enabled = false; + atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0); + oa_sample_rate_hard_limit = dev_priv->perf.oa.timestamp_frequency / 2; dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ad37e2b236b0..e2f83a12337b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -350,8 +350,10 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) rq = port_unpack(&port[n], &count); if (rq) { GEM_BUG_ON(count > !n); - if (!count++) + if (!count++) { + i915_perf_emit_sseu_config(rq); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + } port_set(&port[n], port_pack(rq, count)); desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); @@ -1406,6 +1408,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } + /* Emit NOA config */ + i915_oa_emit_noa_config_locked(req); + cs = intel_ring_begin(req, 4); if (IS_ERR(cs)) return PTR_ERR(cs); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 27cc72a85d07..c82f7473e3a0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -447,6 +447,16 @@ typedef struct drm_i915_setparam { int value; } drm_i915_setparam_t; +union drm_i915_gem_param_sseu { + struct { + __u8 slice_mask; + __u8 subslice_mask; + __u8 min_eu_per_subslice; + __u8 max_eu_per_subslice; + } packed; + __u64 value; +}; + /* A memory manager for regions of shared memory: */ #define I915_MEM_REGION_AGP 1 @@ -1376,6 +1386,13 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_NOA_RESTORE, + /** + * Specifying this property will lead the perf driver to insert sseu + * change reports (see @DRM_I915_PERF_RECORD_SSEU_CHANGE) in the + * stream of reports. + */ + DRM_I915_PERF_PROP_SSEU_CHANGE, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; @@ -1459,9 +1476,27 @@ enum drm_i915_perf_record_type { */ DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, + /** + * A change in the sseu configuration happened for a particular + * context. + * + * struct { + * struct drm_i915_perf_record_header header; + * { struct drm_i915_perf_sseu_change change; } && DRM_I915_PERF_PROP_SSEU_CHANGE + * }; + */ + DRM_I915_PERF_RECORD_SSEU_CHANGE = 4, + DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; +struct drm_i915_perf_sseu_change { + __u32 hw_id; + __u16 pad; + __u8 slice_mask; + __u8 subslice_mask; +}; + #if defined(__cplusplus) } #endif -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx