Quoting Lionel Landwerlin (2019-06-27 09:00:44) > We can't run into issues with doing writing the global OA/NOA > registers configuration from CPU so far, but HW engineers actually > recommend doing this from the command streamer. > > Since we have a command buffer prepared for the execbuffer side of > things, we can reuse that approach here too. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_perf.c | 203 +++++++++++++++---------------- > 1 file changed, 100 insertions(+), 103 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index bf4f5fee6764..7e636463e1f5 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -389,6 +389,19 @@ void i915_oa_config_release(struct kref *ref) > kfree(oa_config); > } > > +static void i915_oa_config_dispose_buffers(struct drm_i915_private *i915) > +{ > + struct i915_oa_config *oa_config, *next; > + > + mutex_lock(&i915->perf.metrics_lock); > + list_for_each_entry_safe(oa_config, next, &i915->perf.metrics_buffers, vma_link) { > + list_del(&oa_config->vma_link); > + i915_gem_object_put(oa_config->obj); > + oa_config->obj = NULL; > + } > + mutex_unlock(&i915->perf.metrics_lock); > +} > + > static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) > { > u32 i; > @@ -1813,67 +1826,86 @@ static int alloc_noa_wait(struct drm_i915_private *i915) > return ret; > } > > -static void config_oa_regs(struct drm_i915_private *dev_priv, > - const struct i915_oa_reg *regs, > - u32 n_regs) > +static int config_oa_regs(struct drm_i915_private *i915, > + struct i915_oa_config *oa_config) > { > - u32 i; > + struct i915_request *rq; > + struct i915_vma *vma; > + long timeout; > + u32 *cs; > + int err; > > - for (i = 0; i < n_regs; i++) { > - const struct i915_oa_reg *reg = regs + i; > + rq = i915_request_create(i915->engine[RCS0]->kernel_context); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > > - I915_WRITE(reg->addr, reg->value); > + err = i915_active_request_set(&i915->engine[RCS0]->last_oa_config, > + rq); This will need an explicit mutex as it is not serialised by any single timeline. i915->perf.metrics_lock viable? Needs to be taken here and in execbuf. > + if (err) { > + i915_request_add(rq); > + return err; > + } > + > + vma = i915_vma_instance(oa_config->obj, &i915->ggtt.vm, NULL); > + if (unlikely(IS_ERR(vma))) { > + i915_request_add(rq); > + return PTR_ERR(vma); > + } > + > + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); > + if (err) { > + i915_request_add(rq); > + return err; > } This time there is no excuse for not pinning outside of the timeline->mutex. > + > + err = i915_vma_move_to_active(vma, rq, 0); > + if (err) { > + i915_vma_unpin(vma); > + i915_request_add(rq); > + return err; > + } > + > + cs = intel_ring_begin(rq, INTEL_GEN(i915) >= 8 ? 4 : 2); > + if (IS_ERR(cs)) { > + i915_vma_unpin(vma); > + i915_request_add(rq); > + return PTR_ERR(cs); > + } > + > + if (INTEL_GEN(i915) > 8) { > + *cs++ = MI_BATCH_BUFFER_START_GEN8; > + *cs++ = lower_32_bits(vma->node.start); > + *cs++ = upper_32_bits(vma->node.start); > + *cs++ = MI_NOOP; > + } else { > + *cs++ = MI_BATCH_BUFFER_START; > + *cs++ = vma->node.start; > + } > + > + intel_ring_advance(rq, cs); > + > + i915_vma_unpin(vma); > + > + i915_request_add(rq); > + > + i915_request_get(rq); Strictly, before the add, as the add is the xfer of the ref. > + timeout = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, > + MAX_SCHEDULE_TIMEOUT); > + i915_request_put(rq); > + > + return timeout < 0 ? err : 0; Report err, but timeline carries the errno? -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx