On Fri, Feb 15, 2013 at 04:12:16PM +0200, Mika Kuoppala wrote: > Ville Syrj?l? <ville.syrjala at linux.intel.com> writes: > > > On Mon, Feb 04, 2013 at 04:04:43PM +0200, Mika Kuoppala wrote: > >> After hang check timer has declared gpu to be hang, > >> rings are reset. In ring reset, when clearing > >> request list, do post mortem analysis to find out > >> the guilty batch buffer. > >> > >> Select requests for further analysis by inspecting > >> the completed sequence number which has been updated > >> into the HWS page. If request was completed, it can't > >> be related to the hang. > >> > >> For completed requests mark the batch as guilty > > ^^^^^^^^^ > > > > That's a typo, right? > > It sure is. Will fix. > > >> if the ring was not waiting and the ring head was > >> stuck inside the buffer object or in the flush region > >> right after the batch. For everything else, mark > >> them as innocents. > >> > >> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_gem.c | 91 +++++++++++++++++++++++++++++++++++++++ > >> 1 file changed, 91 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >> index b304b06..db0f3e3 100644 > >> --- a/drivers/gpu/drm/i915/i915_gem.c > >> +++ b/drivers/gpu/drm/i915/i915_gem.c > >> @@ -2092,9 +2092,97 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) > >> spin_unlock(&file_priv->mm.lock); > >> } > >> > >> +static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj) > >> +{ > >> + if (acthd >= obj->gtt_offset && > >> + acthd < obj->gtt_offset + obj->base.size) > >> + return true; > >> + > >> + return false; > >> +} > >> + > >> +static bool i915_head_inside_request(u32 acthd, u32 rs, u32 re) > >> +{ > >> + if (rs < re) { > >> + if (acthd >= rs && acthd < re) > >> + return true; > >> + } else if (rs > re) { > >> + if (acthd >= rs || acthd < re) > >> + return true; > >> + } > >> + > >> + return false; > >> +} > >> + > >> +static bool i915_request_guilty(struct drm_i915_gem_request *request, > >> + const u32 acthd, bool *inside) > >> +{ > >> + if (request->batch_obj) { > >> + if (i915_head_inside_object(acthd, request->batch_obj)) { > >> + *inside = true; > >> + return true; > >> + } > >> + } > >> + > >> + if (i915_head_inside_request(acthd, request->head, request->tail)) { > >> + *inside = false; > >> + return true; > >> + } > >> + > >> + return false; > >> +} > >> + > >> +static void i915_set_reset_status(struct intel_ring_buffer *ring, > >> + struct drm_i915_gem_request *request, > >> + u32 acthd) > >> +{ > >> + bool inside; > >> + struct i915_reset_stats *rs = NULL; > >> + bool guilty; > >> + > >> + /* Innocent until proven guilty */ > >> + guilty = false; > >> + > >> + if (!ring->hangcheck_waiting && > >> + i915_request_guilty(request, acthd, &inside)) { > >> + DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n", > >> + ring->name, > >> + inside ? "inside" : "flushing", > >> + request->batch_obj ? > >> + request->batch_obj->gtt_offset : 0, > >> + request->ctx ? request->ctx->id : 0, > >> + acthd); > >> + > >> + guilty = true; > >> + } > >> + > >> + /* If contexts are disabled or this is the default context, use > >> + * file_priv->reset_stats > >> + */ > >> + if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) > >> + rs = &request->ctx->reset_stats; > >> + else if (request->file_priv) > >> + rs = &request->file_priv->reset_stats; > >> + > >> + if (rs) { > >> + rs->total++; > >> + > >> + if (guilty) > >> + rs->guilty++; > >> + else > >> + rs->innocent++; > >> + } > >> +} > >> + > >> static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, > >> struct intel_ring_buffer *ring) > >> { > >> + u32 completed_seqno; > >> + u32 acthd; > >> + > >> + acthd = intel_ring_get_active_head(ring); > >> + completed_seqno = ring->get_seqno(ring, false); > >> + > >> while (!list_empty(&ring->request_list)) { > >> struct drm_i915_gem_request *request; > >> > >> @@ -2102,6 +2190,9 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, > >> struct drm_i915_gem_request, > >> list); > >> > >> + if (request->seqno > completed_seqno) > > > > i915_seqno_passed()? > > For readability or for correctness? > > When seqno wraps, the request queue will be cleaned up so > we can't have cross wrap boundary stuff in here. > > Or did you have something else in mind that i have missed. Nah. It just seems suspicious to have a direct comparison with any comment why i915_seqno_passed() isn't used. -- Ville Syrj?l? Intel OTC