On 5 June 2017 at 15:48, Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> wrote: > This adds the ability for userspace to request that the kernel track & > record sseu configuration changes. These changes are inserted into the > perf stream so that userspace can interpret the OA reports using the > configuration applied at the time the OA reports where generated. > > v2: Handle timestamps wrapping around 32bits (Lionel) > > v3: Use void * instead of u8 * for buffer virtual address (Chris) > Use i915_vam_unpin_and_release (Chris) > Fix a deadlock when SSEU buffer overflows (Lionel) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 45 ++++ > drivers/gpu/drm/i915/i915_gem_context.c | 3 + > drivers/gpu/drm/i915/i915_gem_context.h | 21 ++ > drivers/gpu/drm/i915/i915_perf.c | 454 ++++++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/intel_lrc.c | 7 +- > include/uapi/drm/i915_drm.h | 35 +++ > 6 files changed, 535 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 7ef1908e3a98..82f4e52718eb 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1970,6 +1970,12 @@ struct i915_perf_stream { > bool noa_restore; > > /** > + * @has_sseu: Whether the stream emits SSEU configuration changes > + * reports. > + */ > + bool has_sseu; > + > + /** > * @enabled: Whether the stream is currently enabled, considering > * whether the stream was opened in a disabled state and based > * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. > @@ -2508,6 +2514,44 @@ struct drm_i915_private { > */ > atomic_t noa_restore; > } oa; > + > + struct { > + /** > + * Buffer containing change reports of the SSEU > + * configuration. > + */ > + struct i915_vma *vma; > + void *vaddr; > + > + /** > + * Scheduler write to the head, and the perf driver > + * reads from tail. > + */ > + u32 head; > + u32 tail; > + > + /** > + * Is the sseu buffer enabled. > + */ > + bool enabled; > + > + /** > + * Whether the buffer overflown. > + */ > + bool overflow; > + > + /** > + * Keeps track of how many times the OA unit has been > + * enabled. This number is used to discard stale > + * @perf_sseu in @i915_gem_context. > + */ > + atomic64_t enable_no; > + > + /** > + * Lock writes & tail pointer updates on this buffer. > + */ > + spinlock_t ptr_lock; > + } sseu_buffer; > } perf; > > /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ > @@ -3558,6 +3602,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, > struct i915_gem_context *ctx, > uint32_t *reg_state); > int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req); > +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req); > > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 60d0bbf0ae2b..368bb527a75a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -510,6 +510,9 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags) > if (IS_ERR(cs)) > return PTR_ERR(cs); > > + /* Notify perf of possible change in SSEU configuration. */ > + i915_perf_emit_sseu_config(req); > + > /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ > if (INTEL_GEN(dev_priv) >= 7) { > *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h > index e8247e2f6011..35ebdde1cd16 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/i915_gem_context.h > @@ -194,6 +194,27 @@ struct i915_gem_context { > > /** remap_slice: Bitmask of cache lines that need remapping */ > u8 remap_slice; > + > + /** > + * @perf_sseu: last SSEU configuration notified to userspace > + * > + * SSEU configuration changes are notified to userspace through the > + * perf infrastructure. We keep track here of the last notified > + * configuration for a given context since configuration can change > + * per engine. > + */ > + struct sseu_dev_info perf_sseu; > + > + /** > + * @perf_enable_no: last perf enable identifier > + * > + * Userspace can enable/disable the perf infrastructure whenever it > + * wants without reconfiguring the OA unit. This number is updated at > + * the same time as @perf_sseu by copying > + * dev_priv->perf.oa.oa_sseu.enable_no which changes every time the > + * user enables the OA unit. > + */ > + u64 perf_enable_no; > }; > > static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 3f49ce69641b..87ebc450c456 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -212,6 +212,15 @@ > */ > #define OA_BUFFER_SIZE SZ_16M > > +/* Dimension the SSEU buffer based on the size of the OA buffer to ensure we > + * don't run out of space before the OA unit notifies us new data is > + * available. > + */ > +#define SSEU_BUFFER_ITEM_SIZE sizeof(struct perf_sseu_change) > +#define SSEU_BUFFER_SIZE ((OA_BUFFER_SIZE * SSEU_BUFFER_ITEM_SIZE) / \ > + 256 /* Maximum report size */) > +#define SSEU_BUFFER_NB_ITEM (SSEU_BUFFER_SIZE / SSEU_BUFFER_ITEM_SIZE) > + > #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) > > /** > @@ -352,6 +361,8 @@ struct perf_open_properties { > > bool noa_restore; > > + bool sseu_enable; > + > /* OA sampling state */ > int metrics_set; > int oa_format; > @@ -359,6 +370,19 @@ struct perf_open_properties { > int oa_period_exponent; > }; > > +struct perf_sseu_change { > + u32 timestamp; > + struct drm_i915_perf_sseu_change change; > +}; > + > +static void i915_oa_stream_disable(struct i915_perf_stream *stream); > +static void i915_oa_stream_enable(struct i915_perf_stream *stream); > + > +static bool timestamp_passed(u32 t0, u32 t1) > +{ > + return (s32)(t0 - t1) >= 0; Formatting seems off, also timestamp_passed_or_equal? > +} > + > static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) > { > return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; > @@ -571,6 +595,87 @@ static int append_oa_sample(struct i915_perf_stream *stream, > return 0; > } > > +static int append_sseu_change(struct i915_perf_stream *stream, > + char __user *buf, > + size_t count, > + size_t *offset, > + const struct perf_sseu_change *sseu_report) > +{ > + struct drm_i915_perf_record_header header; > + > + header.type = DRM_I915_PERF_RECORD_SSEU_CHANGE; > + header.pad = 0; > + header.size = sizeof(struct drm_i915_perf_record_header) + > + sizeof(struct drm_i915_perf_sseu_change); > + > + if ((count - *offset) < header.size) > + return -ENOSPC; > + > + buf += *offset; > + if (copy_to_user(buf, &header, sizeof(header))) > + return -EFAULT; > + buf += sizeof(header); > + > + if (copy_to_user(buf, &sseu_report->change, > + sizeof(struct drm_i915_perf_sseu_change))) > + return -EFAULT; > + > + (*offset) += header.size; > + > + return 0; > +} > + > +static struct perf_sseu_change * > +sseu_buffer_peek(struct drm_i915_private *dev_priv, u32 head, u32 tail) > +{ > + if (head == tail) > + return NULL; > + > + return dev_priv->perf.sseu_buffer.vaddr + tail * SSEU_BUFFER_ITEM_SIZE; > +} > + > +static struct perf_sseu_change * > +sseu_buffer_advance(struct drm_i915_private *dev_priv, u32 head, u32 *tail) > +{ > + *tail = (*tail + 1) % SSEU_BUFFER_NB_ITEM; > + > + return sseu_buffer_peek(dev_priv, head, *tail); > +} > + > +static void > +sseu_buffer_read_pointers(struct i915_perf_stream *stream, u32 *head, u32 *tail) > +{ > + struct drm_i915_private *dev_priv = stream->dev_priv; > + > + if (!stream->has_sseu) { > + *head = 0; > + *tail = 0; > + return; > + } > + > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + *head = dev_priv->perf.sseu_buffer.head; > + *tail = dev_priv->perf.sseu_buffer.tail; > + > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > +} > + > +static void > +sseu_buffer_write_tail_pointer(struct i915_perf_stream *stream, u32 tail) > +{ > + struct drm_i915_private *dev_priv = stream->dev_priv; > + > + if (!stream->has_sseu) > + return; > + > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + dev_priv->perf.sseu_buffer.tail = tail; > + > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > +} > + > /** > * Copies all buffered OA reports into userspace read() buffer. > * @stream: An i915-perf stream opened for OA metrics > @@ -606,6 +711,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > unsigned int aged_tail_idx; > u32 head, tail; > u32 taken; > + struct perf_sseu_change *sseu_report; > + u32 sseu_head, sseu_tail; > int ret = 0; > > if (WARN_ON(!stream->enabled)) > @@ -646,6 +753,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > head, tail)) > return -EIO; > > + /* Extract first SSEU report. */ > + sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail); > + sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail); > > for (/* none */; > (taken = OA_TAKEN(tail, head)); > @@ -686,6 +796,26 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > continue; > } > > + while (sseu_report && > + !timestamp_passed(sseu_report->timestamp, report32[1])) { > + /* > + * While filtering for a single context we avoid > + * leaking the IDs of other contexts. > + */ > + if (stream->ctx && > + stream->ctx->hw_id != sseu_report->change.hw_id) { > + sseu_report->change.hw_id = INVALID_CTX_ID; > + } > + > + ret = append_sseu_change(stream, buf, count, > + offset, sseu_report); > + if (ret) > + break; > + > + sseu_report = sseu_buffer_advance(dev_priv, sseu_head, > + &sseu_tail); > + } > + > /* > * XXX: Just keep the lower 21 bits for now since I'm not > * entirely sure if the HW touches any of the higher bits in > @@ -783,6 +913,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); > } > > + /* Update tail pointer of the sseu buffer. */ > + sseu_buffer_write_tail_pointer(stream, sseu_tail); > + > return ret; > } > > @@ -812,12 +945,29 @@ static int gen8_oa_read(struct i915_perf_stream *stream, > size_t *offset) > { > struct drm_i915_private *dev_priv = stream->dev_priv; > + bool overflow = false; > u32 oastatus; > int ret; > > if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) > return -EIO; > > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + overflow = dev_priv->perf.sseu_buffer.overflow; > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + if (overflow) { > + DRM_DEBUG("SSEU buffer overflow\n"); > + > + ret = append_oa_status(stream, buf, count, offset, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > + if (ret) > + return ret; > + > + i915_oa_stream_disable(stream); > + i915_oa_stream_enable(stream); > + } > + > oastatus = I915_READ(GEN8_OASTATUS); > > /* > @@ -835,16 +985,18 @@ static int gen8_oa_read(struct i915_perf_stream *stream, > * that something has gone quite badly wrong. > */ > if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { > - ret = append_oa_status(stream, buf, count, offset, > - DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > - if (ret) > - return ret; > - > DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", > dev_priv->perf.oa.period_exponent); > > - dev_priv->perf.oa.ops.oa_disable(dev_priv); > - dev_priv->perf.oa.ops.oa_enable(dev_priv); > + if (!overflow) { > + ret = append_oa_status(stream, buf, count, offset, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > + if (ret) > + return ret; > + > + i915_oa_stream_disable(stream); > + i915_oa_stream_enable(stream); > + } > > /* > * Note: .oa_enable() is expected to re-init the oabuffer and > @@ -854,10 +1006,12 @@ static int gen8_oa_read(struct i915_perf_stream *stream, > } > > if (oastatus & GEN8_OASTATUS_REPORT_LOST) { > - ret = append_oa_status(stream, buf, count, offset, > - DRM_I915_PERF_RECORD_OA_REPORT_LOST); > - if (ret) > - return ret; > + if (!overflow) { > + ret = append_oa_status(stream, buf, count, offset, > + DRM_I915_PERF_RECORD_OA_REPORT_LOST); > + if (ret) > + return ret; > + } > I915_WRITE(GEN8_OASTATUS, > oastatus & ~GEN8_OASTATUS_REPORT_LOST); > } > @@ -900,6 +1054,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > unsigned int aged_tail_idx; > u32 head, tail; > u32 taken; > + struct perf_sseu_change *sseu_report; > + u32 sseu_head, sseu_tail; > int ret = 0; > > if (WARN_ON(!stream->enabled)) > @@ -937,6 +1093,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > head, tail)) > return -EIO; > > + /* Extract first SSEU report. */ > + sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail); > + sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail); > > for (/* none */; > (taken = OA_TAKEN(tail, head)); > @@ -969,6 +1128,27 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > continue; > } > > + /* Emit any potential sseu report. */ > + while (sseu_report && > + !timestamp_passed(sseu_report->timestamp, report32[1])) { > + /* > + * While filtering for a single context we avoid > + * leaking the IDs of other contexts. > + */ > + if (stream->ctx && > + stream->ctx->hw_id != sseu_report->change.hw_id) { > + sseu_report->change.hw_id = INVALID_CTX_ID; > + } > + > + ret = append_sseu_change(stream, buf, count, > + offset, sseu_report); > + if (ret) > + break; > + > + sseu_report = sseu_buffer_advance(dev_priv, sseu_head, > + &sseu_tail); > + } > + > ret = append_oa_sample(stream, buf, count, offset, report); > if (ret) > break; > @@ -998,6 +1178,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); > } > > + /* Update tail pointer of the sseu buffer. */ > + sseu_buffer_write_tail_pointer(stream, sseu_tail); > + > return ret; > } > > @@ -1023,12 +1206,29 @@ static int gen7_oa_read(struct i915_perf_stream *stream, > size_t *offset) > { > struct drm_i915_private *dev_priv = stream->dev_priv; > + bool overflow = false; > u32 oastatus1; > int ret; > > if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) > return -EIO; > > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + overflow = dev_priv->perf.sseu_buffer.overflow; > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + if (overflow) { > + DRM_DEBUG("SSEU buffer overflow\n"); > + > + ret = append_oa_status(stream, buf, count, offset, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > + if (ret) > + return ret; > + > + i915_oa_stream_disable(stream); > + i915_oa_stream_enable(stream); > + } > + > oastatus1 = I915_READ(GEN7_OASTATUS1); > > /* XXX: On Haswell we don't have a safe way to clear oastatus1 > @@ -1059,25 +1259,30 @@ static int gen7_oa_read(struct i915_perf_stream *stream, > * now. > */ > if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { > - ret = append_oa_status(stream, buf, count, offset, > - DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > - if (ret) > - return ret; > - > DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", > dev_priv->perf.oa.period_exponent); > > - dev_priv->perf.oa.ops.oa_disable(dev_priv); > - dev_priv->perf.oa.ops.oa_enable(dev_priv); > + if (!overflow) { > + ret = append_oa_status(stream, buf, count, offset, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > + if (ret) > + return ret; > + > + i915_oa_stream_disable(stream); > + i915_oa_stream_enable(stream); > + } > > oastatus1 = I915_READ(GEN7_OASTATUS1); > } > > if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { > - ret = append_oa_status(stream, buf, count, offset, > - DRM_I915_PERF_RECORD_OA_REPORT_LOST); > - if (ret) > - return ret; > + if (!overflow) { > + ret = append_oa_status(stream, buf, count, offset, > + DRM_I915_PERF_RECORD_OA_REPORT_LOST); > + if (ret) > + return ret; > + } > + > dev_priv->perf.oa.gen7_latched_oastatus1 |= > GEN7_OASTATUS1_REPORT_LOST; > } > @@ -1227,18 +1432,21 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) > } > > static void > -free_oa_buffer(struct drm_i915_private *i915) > +free_sseu_buffer(struct drm_i915_private *i915) > { > - mutex_lock(&i915->drm.struct_mutex); > + i915_gem_object_unpin_map(i915->perf.sseu_buffer.vma->obj); > + i915_vma_unpin_and_release(&i915->perf.sseu_buffer.vma); > > + i915->perf.sseu_buffer.vaddr = NULL; > +} > + > +static void > +free_oa_buffer(struct drm_i915_private *i915) > +{ > i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj); > - i915_vma_unpin(i915->perf.oa.oa_buffer.vma); > - i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj); > + i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma); > > - i915->perf.oa.oa_buffer.vma = NULL; > i915->perf.oa.oa_buffer.vaddr = NULL; > - > - mutex_unlock(&i915->drm.struct_mutex); > } > > static void i915_oa_stream_destroy(struct i915_perf_stream *stream) > @@ -1255,8 +1463,15 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) > > dev_priv->perf.oa.ops.disable_metric_set(dev_priv); > > + mutex_lock(&dev_priv->drm.struct_mutex); > + > + if (stream->has_sseu) > + free_sseu_buffer(dev_priv); > + > free_oa_buffer(dev_priv); > > + mutex_unlock(&dev_priv->drm.struct_mutex); > + > intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); > intel_runtime_pm_put(dev_priv); > > @@ -1269,6 +1484,97 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) > } > } > > +static void init_sseu_buffer(struct drm_i915_private *dev_priv, > + bool need_lock) > +{ > + /* Cleanup auxilliary buffer. */ > + memset(dev_priv->perf.sseu_buffer.vaddr, 0, SSEU_BUFFER_SIZE); > + > + atomic64_inc(&dev_priv->perf.sseu_buffer.enable_no); > + > + if (need_lock) > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + /* Initialize head & tail pointers. */ > + dev_priv->perf.sseu_buffer.head = 0; > + dev_priv->perf.sseu_buffer.tail = 0; > + dev_priv->perf.sseu_buffer.overflow = false; > + > + if (need_lock) > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > +} > + > +static int alloc_sseu_buffer(struct drm_i915_private *dev_priv) > +{ > + struct drm_i915_gem_object *bo; > + struct i915_vma *vma; > + int ret = 0; > + > + bo = i915_gem_object_create(dev_priv, SSEU_BUFFER_SIZE); > + if (IS_ERR(bo)) { > + DRM_ERROR("Failed to allocate SSEU buffer\n"); > + ret = PTR_ERR(bo); > + goto out; > + } > + > + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SSEU_BUFFER_SIZE, 0); > + if (IS_ERR(vma)) { > + ret = PTR_ERR(vma); > + goto err_unref; > + } > + dev_priv->perf.sseu_buffer.vma = vma; > + > + dev_priv->perf.sseu_buffer.vaddr = > + i915_gem_object_pin_map(bo, I915_MAP_WB); > + if (IS_ERR(dev_priv->perf.sseu_buffer.vaddr)) { > + ret = PTR_ERR(dev_priv->perf.sseu_buffer.vaddr); > + goto err_unpin; > + } > + > + atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0); > + > + init_sseu_buffer(dev_priv, true); > + > + DRM_DEBUG_DRIVER("OA SSEU Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", > + i915_ggtt_offset(dev_priv->perf.sseu_buffer.vma), > + dev_priv->perf.sseu_buffer.vaddr); > + > + goto out; > + > +err_unpin: > + __i915_vma_unpin(vma); > + > +err_unref: > + i915_gem_object_put(bo); > + > + dev_priv->perf.sseu_buffer.vaddr = NULL; > + dev_priv->perf.sseu_buffer.vma = NULL; > + > +out: > + return ret; > +} > + > +static void sseu_enable(struct drm_i915_private *dev_priv) > +{ > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + init_sseu_buffer(dev_priv, false); > + > + dev_priv->perf.sseu_buffer.enabled = true; > + > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > +} > + > +static void sseu_disable(struct drm_i915_private *dev_priv) > +{ > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + dev_priv->perf.sseu_buffer.enabled = false; > + dev_priv->perf.sseu_buffer.overflow = false; > + > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > +} > + > static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) > { > u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); > @@ -1763,6 +2069,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > struct intel_context *ce = &ctx->engine[RCS]; > u32 *regs; > > + memset(&ctx->perf_sseu, 0, sizeof(ctx->perf_sseu)); > + ctx->perf_enable_no = 0; > + > /* OA settings will be set upon first use */ > if (!ce->state) > continue; > @@ -1927,6 +2236,9 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) > { > struct drm_i915_private *dev_priv = stream->dev_priv; > > + if (stream->has_sseu) > + sseu_enable(dev_priv); > + > dev_priv->perf.oa.ops.oa_enable(dev_priv); > > if (dev_priv->perf.oa.periodic) > @@ -1957,6 +2269,9 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) > { > struct drm_i915_private *dev_priv = stream->dev_priv; > > + if (stream->has_sseu) > + sseu_disable(dev_priv); > + > dev_priv->perf.oa.ops.oa_disable(dev_priv); > > if (dev_priv->perf.oa.periodic) > @@ -2087,6 +2402,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > atomic_inc(&dev_priv->perf.oa.noa_restore); > } > > + if (props->sseu_enable) { > + ret = alloc_sseu_buffer(dev_priv); > + if (ret) > + goto err_sseu_buf_alloc; > + > + stream->has_sseu = true; > + } > + > ret = alloc_oa_buffer(dev_priv); > if (ret) > goto err_oa_buf_alloc; > @@ -2119,6 +2442,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > err_enable: > intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); > intel_runtime_pm_put(dev_priv); > + free_sseu_buffer(dev_priv); > + > +err_sseu_buf_alloc: > free_oa_buffer(dev_priv); > > err_oa_buf_alloc: This looks bogus, it needs to be: err_enable: free_oa_buffer() err_oa_buf_alloc: if (stream->has_sseu) free_sseu_buffer() err_sseu_buf_alloc: > @@ -2212,6 +2538,69 @@ int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req) > } > > /** > + * i915_perf_emit_sseu_config - emit the SSEU configuration of a gem request > + * @req: the request > + * > + * If the OA unit is enabled, the write the SSEU configuration of a > + * given request into a circular buffer. The buffer is read at the > + * same time as the OA buffer and its elements are inserted into the > + * perf stream to notify the userspace of SSEU configuration changes. > + * This is needed to interpret values from the OA reports. > + */ > +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req) > +{ > + struct drm_i915_private *dev_priv = req->i915; > + struct perf_sseu_change *report; > + const struct sseu_dev_info *sseu; > + u32 head, timestamp; > + u64 perf_enable_no; > + > + /* Perf not supported. */ > + if (!dev_priv->perf.initialized) > + return; > + > + /* Nothing's changed, no need to signal userspace. */ > + sseu = &req->ctx->engine[req->engine->id].sseu; > + perf_enable_no = atomic64_read(&dev_priv->perf.sseu_buffer.enable_no); > + if (perf_enable_no == req->ctx->perf_enable_no && > + memcmp(&req->ctx->perf_sseu, sseu, sizeof(*sseu)) == 0) > + return; > + > + memcpy(&req->ctx->perf_sseu, sseu, sizeof(*sseu)); > + req->ctx->perf_enable_no = perf_enable_no; > + > + /* Read the timestamp outside the spin lock. */ > + timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE)); Except we then proceed to read it again inside the spinlock :) > + > + spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > + > + if (!dev_priv->perf.sseu_buffer.enabled) > + goto out; > + > + head = dev_priv->perf.sseu_buffer.head; > + dev_priv->perf.sseu_buffer.head = > + (dev_priv->perf.sseu_buffer.head + 1) % SSEU_BUFFER_NB_ITEM; > + > + /* This should never happen, unless we've wrongly dimensioned the SSEU > + * buffer. */ > + if (dev_priv->perf.sseu_buffer.head == dev_priv->perf.sseu_buffer.tail) { > + dev_priv->perf.sseu_buffer.overflow = true; > + goto out; > + } What is the intention here if we do overflow, we seem to bail initially, but if we re-enter and we are still in an overflow state we will continue writing out reports anyway? > + > + report = dev_priv->perf.sseu_buffer.vaddr + head * SSEU_BUFFER_ITEM_SIZE; > + memset(report, 0, sizeof(*report)); > + > + report->timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE)); > + report->change.hw_id = req->ctx->hw_id; > + report->change.slice_mask = sseu->slice_mask; > + report->change.subslice_mask = sseu->subslice_mask; > + > +out: > + spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock); > +} > + > +/** > * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation > * @stream: An i915 perf stream > * @file: An i915 perf stream file > @@ -2867,6 +3256,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, > case DRM_I915_PERF_PROP_NOA_RESTORE: > props->noa_restore = true; > break; > + case DRM_I915_PERF_PROP_SSEU_CHANGE: > + props->sseu_enable = true; > + break; > case DRM_I915_PERF_PROP_MAX: > MISSING_CASE(id); > return -EINVAL; > @@ -3229,6 +3621,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > > atomic_set(&dev_priv->perf.oa.noa_restore, 0); > > + spin_lock_init(&dev_priv->perf.sseu_buffer.ptr_lock); > + dev_priv->perf.sseu_buffer.enabled = false; > + atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0); > + > oa_sample_rate_hard_limit = > dev_priv->perf.oa.timestamp_frequency / 2; > dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index ad37e2b236b0..e2f83a12337b 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -350,8 +350,10 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > rq = port_unpack(&port[n], &count); > if (rq) { > GEM_BUG_ON(count > !n); > - if (!count++) > + if (!count++) { > + i915_perf_emit_sseu_config(rq); > execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > + } > port_set(&port[n], port_pack(rq, count)); > desc = execlists_update_context(rq); > GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); > @@ -1406,6 +1408,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); > } > > + /* Emit NOA config */ > + i915_oa_emit_noa_config_locked(req); > + > cs = intel_ring_begin(req, 4); > if (IS_ERR(cs)) > return PTR_ERR(cs); > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 27cc72a85d07..c82f7473e3a0 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -447,6 +447,16 @@ typedef struct drm_i915_setparam { > int value; > } drm_i915_setparam_t; > > +union drm_i915_gem_param_sseu { > + struct { > + __u8 slice_mask; > + __u8 subslice_mask; > + __u8 min_eu_per_subslice; > + __u8 max_eu_per_subslice; > + } packed; > + __u64 value; > +}; Please kill? > + > /* A memory manager for regions of shared memory: > */ > #define I915_MEM_REGION_AGP 1 > @@ -1376,6 +1386,13 @@ enum drm_i915_perf_property_id { > */ > DRM_I915_PERF_PROP_NOA_RESTORE, > > + /** > + * Specifying this property will lead the perf driver to insert sseu > + * change reports (see @DRM_I915_PERF_RECORD_SSEU_CHANGE) in the > + * stream of reports. > + */ > + DRM_I915_PERF_PROP_SSEU_CHANGE, > + > DRM_I915_PERF_PROP_MAX /* non-ABI */ > }; > > @@ -1459,9 +1476,27 @@ enum drm_i915_perf_record_type { > */ > DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, > > + /** > + * A change in the sseu configuration happened for a particular > + * context. > + * > + * struct { > + * struct drm_i915_perf_record_header header; > + * { struct drm_i915_perf_sseu_change change; } && DRM_I915_PERF_PROP_SSEU_CHANGE > + * }; > + */ > + DRM_I915_PERF_RECORD_SSEU_CHANGE = 4, > + > DRM_I915_PERF_RECORD_MAX /* non-ABI */ > }; > > +struct drm_i915_perf_sseu_change { > + __u32 hw_id; > + __u16 pad; > + __u8 slice_mask; > + __u8 subslice_mask; > +}; > + > #if defined(__cplusplus) > } > #endif > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx