On Wed, Jul 15, 2015 at 02:17:01PM +0530, sourab.gupta@xxxxxxxxx wrote: > +void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id) You need to pass in the request here instead. A better name would be i915_oa_emit_perf_report(). insert_cmd() is a little too generic (i.e. which cmd do you mean?). > +{ > + struct intel_engine_cs *ring = ringbuf->ring; > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > + struct drm_i915_gem_object *obj = dev_priv->oa_pmu.oa_rcs_buffer.obj; > + struct i915_oa_rcs_node *entry; > + unsigned long lock_flags; > + u32 addr = 0; > + int ret; > + > + /* OA counters are only supported on the render ring */ > + if (ring->id != RCS) > + return; > + > + entry = kzalloc(sizeof(*entry), GFP_KERNEL); > + if (entry == NULL) { > + DRM_ERROR("alloc failed\n"); > + return; > + } > + entry->ctx_id = ctx_id; > + i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request); > + > + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); > + if (list_empty(&dev_priv->oa_pmu.node_list)) > + entry->offset = 0; > + else { > + struct i915_oa_rcs_node *last_entry; > + int max_offset = dev_priv->oa_pmu.oa_rcs_buffer.node_count * > + dev_priv->oa_pmu.oa_rcs_buffer.node_size; > + > + last_entry = list_last_entry(&dev_priv->oa_pmu.node_list, > + struct i915_oa_rcs_node, head); > + entry->offset = last_entry->offset + > + dev_priv->oa_pmu.oa_rcs_buffer.node_size; > + > + if (entry->offset > max_offset) > + entry->offset = 0; > + } > + list_add_tail(&entry->head, &dev_priv->oa_pmu.node_list); > + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); > + > + addr = i915_gem_obj_ggtt_offset(obj) + entry->offset; Don't do more than one i915_gem_obj_to_ggtt() please (preferably none and just keep hold of your pinned vma from the start). > + /* addr should be 64 byte aligned */ > + BUG_ON(addr & 0x3f); > + > + ret = intel_ring_begin(ring, 4); > + if (ret) > + return; You've commited the request to the sample list, but have just erred out. > + > + intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0)); > + intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT); > + intel_ring_emit(ring, ring->outstanding_lazy_request->seqno); > + intel_ring_emit(ring, MI_NOOP); > + intel_ring_advance(ring); > + > + obj->base.write_domain = I915_GEM_DOMAIN_RENDER; > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring); That's the magic I have been looking for. -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx