In order to track down a batch buffer and context which caused the ring to hang, store reference to bo into the request struct. Request can also cause gpu to hang after the batch in the flush section in the ring. To detect this add start of the flush portion offset into the request. Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 11 +++++++++-- drivers/gpu/drm/i915/i915_gem.c | 6 +++++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 ++++--- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 22dcff6..8bc399c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1352,12 +1352,18 @@ struct drm_i915_gem_request { /** GEM sequence number associated with this request. */ uint32_t seqno; - /** Postion in the ringbuffer of the end of the request */ + /** Position in the ringbuffer of the start of the request */ + u32 head; + + /** Position in the ringbuffer of the end of the request */ u32 tail; /** Context related to this request */ struct i915_hw_context *ctx; + /** Batch buffer related to this request if any */ + struct drm_i915_gem_object *batch_obj; + /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; @@ -1747,9 +1753,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_idle(struct drm_device *dev); int __i915_add_request(struct intel_ring_buffer *ring, struct drm_file *file, + struct drm_i915_gem_object *batch_obj, u32 *seqno); #define i915_add_request(ring, seqno) \ - __i915_add_request(ring, NULL, seqno); + __i915_add_request(ring, NULL, NULL, seqno); int __must_check i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 38e2087..5be7846 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2002,14 +2002,16 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) int __i915_add_request(struct intel_ring_buffer *ring, struct drm_file *file, + struct drm_i915_gem_object *obj, u32 *out_seqno) { drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; - u32 request_ring_position; + u32 request_ring_position, request_start; int was_empty; int ret; + request_start = intel_ring_get_tail(ring); /* * Emit any outstanding flushes - execbuf can fail to emit the flush * after having emitted the batchbuffer command. Hence we need to fix @@ -2041,8 +2043,10 @@ int __i915_add_request(struct intel_ring_buffer *ring, request->seqno = intel_ring_get_seqno(ring); request->ring = ring; + request->head = request_start; request->tail = request_ring_position; request->ctx = ring->last_context; + request->batch_obj = obj; if (request->ctx) i915_gem_context_reference(request->ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d79ac7a..87a3227 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -796,13 +796,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, static void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, - struct intel_ring_buffer *ring) + struct intel_ring_buffer *ring, + struct drm_i915_gem_object *obj) { /* Unconditionally force add_request to emit a full flush. */ ring->gpu_caches_dirty = true; /* Add a breadcrumb for the completion of the batch buffer */ - (void)__i915_add_request(ring, file, NULL); + (void)__i915_add_request(ring, file, obj, NULL); } static int @@ -1083,7 +1084,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); i915_gem_execbuffer_move_to_active(&eb->objects, ring); - i915_gem_execbuffer_retire_commands(dev, file, ring); + i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); err: eb_destroy(eb); -- 1.7.9.5