Quoting Mika Kuoppala (2019-08-16 08:50:29) > Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > > static inline struct i915_request * > > execlists_schedule_in(struct i915_request *rq, int idx) > > { > > - struct intel_context *ce = rq->hw_context; > > - int count; > > + struct intel_context * const ce = rq->hw_context; > > + struct intel_engine_cs *old; > > > > + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); > > trace_i915_request_in(rq, idx); > > > > - count = intel_context_inflight_count(ce); > > - if (!count) { > > - intel_context_get(ce); > > - ce->inflight = rq->engine; > > - > > - intel_gt_pm_get(ce->inflight->gt); > > - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > > - intel_engine_context_in(ce->inflight); > > - } > > + old = READ_ONCE(ce->inflight); > > + do { > > + if (!old) { > > The schedule out might have swapped inflight in here ruining our day. > So I am here trying to figure out how you can pull it off. Once we _know_ the context is idle, the only way it can become busy again is via submitting a request under the engine->active.lock, which we hold. Note that schedule-out also has exclusive access by its caller (only one submission tasklet at a time), but schedule-out may run concurrently to schedule-in. (But once we idle a context in schedule-out, we will never see it again until a schedule-in, hence we know that upon seeing NULL we have exclusive access.) > > + WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); > > + break; > > + } > > + } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); > > > > - intel_context_inflight_inc(ce); > > GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); > > - > > return i915_request_get(rq); > > } > > enable_timeslice(struct intel_engine_cs *engine) > > { > > - struct i915_request *last = last_active(&engine->execlists); > > + struct i915_request * const *port; > > + int hint; > > + > > + port = engine->execlists.active; > > + while (port[0] && i915_request_completed(port[0])) > > + port++; > > + if (!port[0]) > > + return false; > > > > - return last && need_timeslice(engine, last); > > + hint = engine->execlists.queue_priority_hint; > > + if (port[1]) > > + hint = max(rq_prio(port[1]), hint); > > + > > + /* Compare the two end-points as an unlocked approximation */ > > + return hint >= effective_prio(port[0]); > > What happens if we get it wrong? So the last element in the next context is always the lowest priority (normally they are all the same priority). If there is a variation in priority along the requests in the second context, that may mask that the first request there should trigger a timeslice. Storing an execlists.switch_priority_hint should remove the ambiguity. > > @@ -1494,9 +1527,12 @@ static void execlists_submission_tasklet(unsigned long data) > > struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > > unsigned long flags; > > > > - spin_lock_irqsave(&engine->active.lock, flags); > > - __execlists_submission_tasklet(engine); > > - spin_unlock_irqrestore(&engine->active.lock, flags); > > + process_csb(engine); > > + if (!engine->execlists.pending[0]) { > > !READ_ONCE(...)? Would atleast pair documentatically. How about if (process_csb()) { > > + spin_lock_irqsave(&engine->active.lock, flags); > > + __execlists_submission_tasklet(engine); > > + spin_unlock_irqrestore(&engine->active.lock, flags); > > + } > > } > > > > static void execlists_submission_timer(struct timer_list *timer) > > diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h > > index d652ba5d2320..562f756da421 100644 > > --- a/drivers/gpu/drm/i915/i915_utils.h > > +++ b/drivers/gpu/drm/i915/i915_utils.h > > @@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size) > > ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ > > }) > > > > -#define ptr_count_dec(p_ptr) do { \ > > - typeof(p_ptr) __p = (p_ptr); \ > > - unsigned long __v = (unsigned long)(*__p); \ > > - *__p = (typeof(*p_ptr))(--__v); \ > > -} while (0) > > - > > -#define ptr_count_inc(p_ptr) do { \ > > - typeof(p_ptr) __p = (p_ptr); \ > > - unsigned long __v = (unsigned long)(*__p); \ > > - *__p = (typeof(*p_ptr))(++__v); \ > > -} while (0) > > +#define ptr_dec(ptr) ({ \ > > + unsigned long __v = (unsigned long)(ptr); \ > > + (typeof(ptr))(__v - 1); \ > > +}) > > + > > +#define ptr_inc(ptr) ({ \ > > + unsigned long __v = (unsigned long)(ptr); \ > > + (typeof(ptr))(__v + 1); \ > > +}) > > Should we add GEM_DEBUG_WARN_ON if we overflow or do we > detect through other means? What's an overflow? What's the alignment of the target pointer? Perhaps the user intended that every 8 uses starts at the next location. That's not known at this basic level. And these are decidedly use-at-you-own-risk :) -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx