On Mon, 15 Oct 2018 at 17:00, Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> wrote: > > The way our hardware is designed doesn't seem to let us use the > MI_RECORD_PERF_COUNT command without setting up a circular buffer. > > In the case where the user didn't request OA reports to be available > through the i915 perf stream, we can set the OA buffer to the minimum > size to avoid consuming memory which won't be used by the driver. > > v2: Simplify oa buffer size exponent selection (Chris) > Reuse vma size field (Lionel) > > v3: Restrict size opening parameter to values supported by HW (Chris) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 92 ++++++++++++++++++++++---------- > drivers/gpu/drm/i915/i915_reg.h | 2 + > include/uapi/drm/i915_drm.h | 7 +++ > 4 files changed, 74 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 65eaac2d7e3c..f12770bd4858 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2053,6 +2053,7 @@ struct drm_i915_private { > u32 last_ctx_id; > int format; > int format_size; > + int size_exponent; > > /** > * Locks reads and writes to all head/tail state > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 88f3f9b6a353..ff90ccebe1c1 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -212,13 +212,7 @@ > #include "i915_oa_icl.h" > #include "intel_lrc_reg.h" > > -/* HW requires this to be a power of two, between 128k and 16M, though driver > - * is currently generally designed assuming the largest 16M size is used such > - * that the overflow cases are unlikely in normal operation. > - */ > -#define OA_BUFFER_SIZE SZ_16M > - > -#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) > +#define OA_TAKEN(tail, head) ((tail - head) & (dev_priv->perf.oa.oa_buffer.vma->size - 1)) > > /** > * DOC: OA Tail Pointer Race > @@ -361,6 +355,7 @@ struct perf_open_properties { > int oa_format; > bool oa_periodic; > int oa_period_exponent; > + u32 oa_buffer_size_exponent; > }; > > static void free_oa_config(struct drm_i915_private *dev_priv, > @@ -523,7 +518,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) > * could put the tail out of bounds... > */ > if (hw_tail >= gtt_offset && > - hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { > + hw_tail < (gtt_offset + dev_priv->perf.oa.oa_buffer.vma->size)) { > dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = > aging_tail = hw_tail; > dev_priv->perf.oa.oa_buffer.aging_timestamp = now; > @@ -652,7 +647,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > int report_size = dev_priv->perf.oa.oa_buffer.format_size; > u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; > u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); > - u32 mask = (OA_BUFFER_SIZE - 1); > + u32 mask = (dev_priv->perf.oa.oa_buffer.vma->size - 1); > size_t start_offset = *offset; > unsigned long flags; > unsigned int aged_tail_idx; > @@ -692,8 +687,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > * only be incremented by multiples of the report size (notably also > * all a power of two). > */ > - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || > - tail > OA_BUFFER_SIZE || tail % report_size, > + if (WARN_ONCE(head > dev_priv->perf.oa.oa_buffer.vma->size || head % report_size || > + tail > dev_priv->perf.oa.oa_buffer.vma->size || tail % report_size, > "Inconsistent OA buffer pointers: head = %u, tail = %u\n", > head, tail)) > return -EIO; > @@ -716,7 +711,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > * here would imply a driver bug that would result > * in an overrun. > */ > - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { > + if (WARN_ON((dev_priv->perf.oa.oa_buffer.vma->size - head) < report_size)) { > DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); > break; > } > @@ -941,7 +936,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > int report_size = dev_priv->perf.oa.oa_buffer.format_size; > u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; > u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); > - u32 mask = (OA_BUFFER_SIZE - 1); > + u32 mask = (dev_priv->perf.oa.oa_buffer.vma->size - 1); > size_t start_offset = *offset; > unsigned long flags; > unsigned int aged_tail_idx; > @@ -978,8 +973,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > * only be incremented by multiples of the report size (notably also > * all a power of two). > */ > - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || > - tail > OA_BUFFER_SIZE || tail % report_size, > + if (WARN_ONCE(head > dev_priv->perf.oa.oa_buffer.vma->size || head % report_size || > + tail > dev_priv->perf.oa.oa_buffer.vma->size || tail % report_size, > "Inconsistent OA buffer pointers: head = %u, tail = %u\n", > head, tail)) > return -EIO; > @@ -999,7 +994,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > * here would imply a driver bug that would result > * in an overrun. > */ > - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { > + if (WARN_ON((dev_priv->perf.oa.oa_buffer.vma->size - head) < report_size)) { > DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); > break; > } > @@ -1396,7 +1391,9 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) > > I915_WRITE(GEN7_OABUFFER, gtt_offset); > > - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ > + I915_WRITE(GEN7_OASTATUS1, gtt_offset | > + ((dev_priv->perf.oa.oa_buffer.size_exponent - 17) << > + GEN7_OASTATUS1_BUFFER_SIZE_SHIFT)); /* tail */ > > /* Mark that we need updated tail pointers to read from... */ > dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; > @@ -1421,7 +1418,8 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) > * the assumption that new reports are being written to zeroed > * memory... > */ > - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); > + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, > + dev_priv->perf.oa.oa_buffer.vma->size); > > /* Maybe make ->pollin per-stream state if we support multiple > * concurrent streams in the future. > @@ -1451,7 +1449,9 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) > * bit." > */ > I915_WRITE(GEN8_OABUFFER, gtt_offset | > - OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); > + ((dev_priv->perf.oa.oa_buffer.size_exponent - 17) << > + GEN8_OABUFFER_BUFFER_SIZE_SHIFT) | > + GEN8_OABUFFER_MEM_SELECT_GGTT); > I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); > > /* Mark that we need updated tail pointers to read from... */ > @@ -1479,7 +1479,8 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) > * the assumption that new reports are being written to zeroed > * memory... > */ > - memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); > + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, > + dev_priv->perf.oa.oa_buffer.vma->size); > > /* > * Maybe make ->pollin per-stream state if we support multiple > @@ -1488,23 +1489,24 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) > dev_priv->perf.oa.pollin = false; > } > > -static int alloc_oa_buffer(struct drm_i915_private *dev_priv) > +static int alloc_oa_buffer(struct drm_i915_private *dev_priv, int size_exponent) > { > struct drm_i915_gem_object *bo; > struct i915_vma *vma; > + size_t size = 1U << size_exponent; > int ret; > > if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) > return -ENODEV; > > + if (WARN_ON(size < SZ_128K || size > SZ_16M)) > + return -EINVAL; > + > ret = i915_mutex_lock_interruptible(&dev_priv->drm); > if (ret) > return ret; > > - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); > - BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); > - > - bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); > + bo = i915_gem_object_create(dev_priv, size); > if (IS_ERR(bo)) { > DRM_ERROR("Failed to allocate OA buffer\n"); > ret = PTR_ERR(bo); > @@ -1522,6 +1524,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) > goto err_unref; > } > dev_priv->perf.oa.oa_buffer.vma = vma; > + dev_priv->perf.oa.oa_buffer.size_exponent = size_exponent; > > dev_priv->perf.oa.oa_buffer.vaddr = > i915_gem_object_pin_map(bo, I915_MAP_WB); > @@ -1530,9 +1533,10 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv) > goto err_unpin; > } > > - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", > + DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p, size = %llu\n", > i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), > - dev_priv->perf.oa.oa_buffer.vaddr); > + dev_priv->perf.oa.oa_buffer.vaddr, > + dev_priv->perf.oa.oa_buffer.vma->size); > > goto unlock; > > @@ -2092,7 +2096,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > intel_runtime_pm_get(dev_priv); > intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); > > - ret = alloc_oa_buffer(dev_priv); > + ret = alloc_oa_buffer(dev_priv, props->oa_buffer_size_exponent); > if (ret) > goto err_oa_buf_alloc; > > @@ -2651,6 +2655,26 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) > 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); > } > > +static int > +select_oa_buffer_exponent(struct drm_i915_private *i915, > + u64 requested_size) > +{ > + int order; > + > + /* > + * When no size is specified, use the largest size supported by all > + * generations. > + */ > + if (!requested_size) > + return order_base_2(SZ_16M); > + > + order = order_base_2(clamp_t(u64, requested_size, SZ_128K, SZ_16M)); > + if (requested_size != (1UL << order)) > + return -EINVAL; > + > + return order; > +} > + > /** > * read_properties_unlocked - validate + copy userspace stream open properties > * @dev_priv: i915 device instance > @@ -2778,6 +2802,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, > props->oa_periodic = true; > props->oa_period_exponent = value; > break; > + case DRM_I915_PERF_PROP_OA_BUFFER_SIZE: > + ret = select_oa_buffer_exponent(dev_priv, value); > + if (ret < 0) DRM_DEBUG("OA buffer size invalid %llu\n", value); There's a now out of date comment in gen8_oa_read about not giving control over the OA buffer size to userspace :) Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx