Style nits: Quoting Daniel Vetter (2017-10-09 17:44:01) > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 82a10036fb38..8d7d8d1f78db 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3057,49 +3057,71 @@ static void nop_submit_request(struct drm_i915_gem_request *request) > > spin_lock_irqsave(&request->engine->timeline->lock, flags); > __i915_gem_request_submit(request); > - intel_engine_init_global_seqno(request->engine, request->global_seqno); > spin_unlock_irqrestore(&request->engine->timeline->lock, flags); This reduced to i915_gem_request_submit(). Hah, you can just engine->submit_request = i915_gem_request_submit, and keep the nop_submit_request for the second phase! That may make the diff neater? > -static int __i915_gem_set_wedged_BKL(void *data) > +void i915_gem_set_wedged(struct drm_i915_private *i915) > { > - struct drm_i915_private *i915 = data; > struct intel_engine_cs *engine; > enum intel_engine_id id; > > + /* First, stop submission to hw, but do not yet complete requests by > + * rolling the global seqno forward (since this would complete requests > + * for which we haven't set the fence error to EIO yet). > + */ I'm doing a quiet fixup of all my comments to follow /* * The core convention, which you normally use anyway. */ > for_each_engine(engine, i915, id) > - engine_set_wedged(engine); > + engine->submit_request = nop_submit_request; > > - set_bit(I915_WEDGED, &i915->gpu_error.flags); > - wake_up_all(&i915->gpu_error.reset_queue); > + /* Make sure no one is running the old callback before we proceed with > + * cancelling requests and resetting the completion tracking. Otherwise > + * we might submit a request to the hardware which never completes. > + */ > + synchronize_rcu(); > > - return 0; > -} > + for_each_engine(engine, i915, id) { > + /* Mark all executing requests as skipped */ > + engine->cancel_requests(engine); > > -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) > -{ > - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); > + /* Only once we've force-cancelled all in-flight requests can we > + * start to complete all requests. > + */ > + engine->submit_request = nop_complete_submit_request; > + } > + > + /* Make sure no request can slip through without getting completed by > + * either this call here to intel_engine_init_global_seqno, or the one > + * in nop_complete_submit_request. > + */ > + synchronize_rcu(); > + > + for_each_engine(engine, i915, id) { > + unsigned long flags; > + > + /* Mark all pending requests as complete so that any concurrent > + * (lockless) lookup doesn't try and wait upon the request as we > + * reset it. > + */ > + spin_lock_irqsave(&engine->timeline->lock, flags); > + intel_engine_init_global_seqno(engine, > + intel_engine_last_submit(engine)); > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > + } > + > + set_bit(I915_WEDGED, &i915->gpu_error.flags); > + wake_up_all(&i915->gpu_error.reset_queue); > } > > bool i915_gem_unset_wedged(struct drm_i915_private *i915) > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index b100b38f1dd2..ef78a85cb845 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -556,7 +556,9 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) > switch (state) { > case FENCE_COMPLETE: > trace_i915_gem_request_submit(request); > + rcu_read_lock(); This trick needs a comment. /* * We need to serialize use of the submit_request() callback with its * hotplugging performed during an emergency i915_gem_set_wedged(). * We use the RCU mechanism to mark the critical section in order to * force i915_gem_set_wedged() to wait until the submit_request() is * completed before proceeding. */ -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx