Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Migrate the request operations out of the main body of i915_gem.c and > into their own C file for easier expansion. > > v2: Move __i915_add_request() across as well > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Acked-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 1 + > drivers/gpu/drm/i915/i915_drv.h | 209 +--------- > drivers/gpu/drm/i915/i915_gem.c | 655 +------------------------------ > drivers/gpu/drm/i915/i915_gem_request.c | 658 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_request.h | 238 ++++++++++++ > 5 files changed, 905 insertions(+), 856 deletions(-) > create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c > create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 75318ebb8d25..6092f0ea24df 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -33,6 +33,7 @@ i915-y += i915_cmd_parser.o \ > i915_gem_gtt.o \ > i915_gem.o \ > i915_gem_render_state.o \ > + i915_gem_request.o \ > i915_gem_shrinker.o \ > i915_gem_stolen.o \ > i915_gem_tiling.o \ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index e73c0fc84c73..a4767c198413 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -61,6 +61,7 @@ > #include "i915_gem.h" > #include "i915_gem_gtt.h" > #include "i915_gem_render_state.h" > +#include "i915_gem_request.h" > > #include "intel_gvt.h" > > @@ -2365,171 +2366,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) > (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ > ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) > > -/** > - * Request queue structure. > - * > - * The request queue allows us to note sequence numbers that have been emitted > - * and may be associated with active buffers to be retired. > - * > - * By keeping this list, we can avoid having to do questionable sequence > - * number comparisons on buffer last_read|write_seqno. It also allows an > - * emission time to be associated with the request for tracking how far ahead > - * of the GPU the submission is. > - * > - * The requests are reference counted, so upon creation they should have an > - * initial reference taken using kref_init > - */ > -struct drm_i915_gem_request { > - struct kref ref; > - > - /** On Which ring this request was generated */ > - struct drm_i915_private *i915; > - struct intel_engine_cs *engine; > - struct intel_signal_node signaling; > - > - /** GEM sequence number associated with the previous request, > - * when the HWS breadcrumb is equal to this the GPU is processing > - * this request. > - */ > - u32 previous_seqno; > - > - /** GEM sequence number associated with this request, > - * when the HWS breadcrumb is equal or greater than this the GPU > - * has finished processing this request. > - */ > - u32 seqno; > - > - /** Position in the ringbuffer of the start of the request */ > - u32 head; > - > - /** > - * Position in the ringbuffer of the start of the postfix. > - * This is required to calculate the maximum available ringbuffer > - * space without overwriting the postfix. > - */ > - u32 postfix; > - > - /** Position in the ringbuffer of the end of the whole request */ > - u32 tail; > - > - /** Preallocate space in the ringbuffer for the emitting the request */ > - u32 reserved_space; > - > - /** > - * Context and ring buffer related to this request > - * Contexts are refcounted, so when this request is associated with a > - * context, we must increment the context's refcount, to guarantee that > - * it persists while any request is linked to it. Requests themselves > - * are also refcounted, so the request will only be freed when the last > - * reference to it is dismissed, and the code in > - * i915_gem_request_free() will then decrement the refcount on the > - * context. > - */ > - struct i915_gem_context *ctx; > - struct intel_ringbuffer *ringbuf; > - > - /** > - * Context related to the previous request. > - * As the contexts are accessed by the hardware until the switch is > - * completed to a new context, the hardware may still be writing > - * to the context object after the breadcrumb is visible. We must > - * not unpin/unbind/prune that object whilst still active and so > - * we keep the previous context pinned until the following (this) > - * request is retired. > - */ > - struct i915_gem_context *previous_context; > - > - /** Batch buffer related to this request if any (used for > - error state dump only) */ > - struct drm_i915_gem_object *batch_obj; > - > - /** Time at which this request was emitted, in jiffies. */ > - unsigned long emitted_jiffies; > - > - /** global list entry for this request */ > - struct list_head list; > - > - struct drm_i915_file_private *file_priv; > - /** file_priv list entry for this request */ > - struct list_head client_list; > - > - /** process identifier submitting this request */ > - struct pid *pid; > - > - /** > - * The ELSP only accepts two elements at a time, so we queue > - * context/tail pairs on a given queue (ring->execlist_queue) until the > - * hardware is available. The queue serves a double purpose: we also use > - * it to keep track of the up to 2 contexts currently in the hardware > - * (usually one in execution and the other queued up by the GPU): We > - * only remove elements from the head of the queue when the hardware > - * informs us that an element has been completed. > - * > - * All accesses to the queue are mediated by a spinlock > - * (ring->execlist_lock). > - */ > - > - /** Execlist link in the submission queue.*/ > - struct list_head execlist_link; > - > - /** Execlists no. of times this request has been sent to the ELSP */ > - int elsp_submitted; > - > - /** Execlists context hardware id. */ > - unsigned ctx_hw_id; > -}; > - > -struct drm_i915_gem_request * __must_check > -i915_gem_request_alloc(struct intel_engine_cs *engine, > - struct i915_gem_context *ctx); > -void i915_gem_request_free(struct kref *req_ref); > -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, > - struct drm_file *file); > - > -static inline uint32_t > -i915_gem_request_get_seqno(struct drm_i915_gem_request *req) > -{ > - return req ? req->seqno : 0; > -} > - > -static inline struct intel_engine_cs * > -i915_gem_request_get_engine(struct drm_i915_gem_request *req) > -{ > - return req ? req->engine : NULL; > -} > - > -static inline struct drm_i915_gem_request * > -i915_gem_request_reference(struct drm_i915_gem_request *req) > -{ > - if (req) > - kref_get(&req->ref); > - return req; > -} > - > -static inline void > -i915_gem_request_unreference(struct drm_i915_gem_request *req) > -{ > - kref_put(&req->ref, i915_gem_request_free); > -} > - > -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, > - struct drm_i915_gem_request *src) > -{ > - if (src) > - i915_gem_request_reference(src); > - > - if (*pdst) > - i915_gem_request_unreference(*pdst); > - > - *pdst = src; > -} > - > -/* > - * XXX: i915_gem_request_completed should be here but currently needs the > - * definition of i915_seqno_passed() which is below. It will be moved in > - * a later patch when the call to i915_seqno_passed() is obsoleted... > - */ > - > /* > * A command that requires special handling by the command parser. > */ > @@ -3297,37 +3133,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, > struct drm_i915_gem_object *new, > unsigned frontbuffer_bits); > > -/** > - * Returns true if seq1 is later than seq2. > - */ > -static inline bool > -i915_seqno_passed(uint32_t seq1, uint32_t seq2) > -{ > - return (int32_t)(seq1 - seq2) >= 0; > -} > - > -static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) > -{ > - return i915_seqno_passed(intel_engine_get_seqno(req->engine), > - req->previous_seqno); > -} > - > -static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) > -{ > - return i915_seqno_passed(intel_engine_get_seqno(req->engine), > - req->seqno); > -} > - > -bool __i915_spin_request(const struct drm_i915_gem_request *request, > - int state, unsigned long timeout_us); > -static inline bool i915_spin_request(const struct drm_i915_gem_request *request, > - int state, unsigned long timeout_us) > -{ > - return (i915_gem_request_started(request) && > - __i915_spin_request(request, state, timeout_us)); > -} > - > -int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); > int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); > > struct drm_i915_gem_request * > @@ -3385,18 +3190,6 @@ void i915_gem_cleanup_engines(struct drm_device *dev); > int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); > int __must_check i915_gem_suspend(struct drm_device *dev); > void i915_gem_resume(struct drm_device *dev); > -void __i915_add_request(struct drm_i915_gem_request *req, > - struct drm_i915_gem_object *batch_obj, > - bool flush_caches); > -#define i915_add_request(req) \ > - __i915_add_request(req, NULL, true) > -#define i915_add_request_no_flush(req) \ > - __i915_add_request(req, NULL, false) > -int __i915_wait_request(struct drm_i915_gem_request *req, > - bool interruptible, > - s64 *timeout, > - struct intel_rps_client *rps); > -int __must_check i915_wait_request(struct drm_i915_gem_request *req); > int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); > int __must_check > i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 8b42a5101f11..10e5db3f294e 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1325,365 +1325,6 @@ put_rpm: > return ret; > } > > -static int > -i915_gem_check_wedge(unsigned reset_counter, bool interruptible) > -{ > - if (__i915_terminally_wedged(reset_counter)) > - return -EIO; > - > - if (__i915_reset_in_progress(reset_counter)) { > - /* Non-interruptible callers can't handle -EAGAIN, hence return > - * -EIO unconditionally for these. */ > - if (!interruptible) > - return -EIO; > - > - return -EAGAIN; > - } > - > - return 0; > -} > - > -static unsigned long local_clock_us(unsigned *cpu) > -{ > - unsigned long t; > - > - /* Cheaply and approximately convert from nanoseconds to microseconds. > - * The result and subsequent calculations are also defined in the same > - * approximate microseconds units. The principal source of timing > - * error here is from the simple truncation. > - * > - * Note that local_clock() is only defined wrt to the current CPU; > - * the comparisons are no longer valid if we switch CPUs. Instead of > - * blocking preemption for the entire busywait, we can detect the CPU > - * switch and use that as indicator of system load and a reason to > - * stop busywaiting, see busywait_stop(). > - */ > - *cpu = get_cpu(); > - t = local_clock() >> 10; > - put_cpu(); > - > - return t; > -} > - > -static bool busywait_stop(unsigned long timeout, unsigned cpu) > -{ > - unsigned this_cpu; > - > - if (time_after(local_clock_us(&this_cpu), timeout)) > - return true; > - > - return this_cpu != cpu; > -} > - > -bool __i915_spin_request(const struct drm_i915_gem_request *req, > - int state, unsigned long timeout_us) > -{ > - unsigned cpu; > - > - /* When waiting for high frequency requests, e.g. during synchronous > - * rendering split between the CPU and GPU, the finite amount of time > - * required to set up the irq and wait upon it limits the response > - * rate. By busywaiting on the request completion for a short while we > - * can service the high frequency waits as quick as possible. However, > - * if it is a slow request, we want to sleep as quickly as possible. > - * The tradeoff between waiting and sleeping is roughly the time it > - * takes to sleep on a request, on the order of a microsecond. > - */ > - > - timeout_us += local_clock_us(&cpu); > - do { > - if (i915_gem_request_completed(req)) > - return true; > - > - if (signal_pending_state(state, current)) > - break; > - > - if (busywait_stop(timeout_us, cpu)) > - break; > - > - cpu_relax_lowlatency(); > - } while (!need_resched()); > - > - return false; > -} > - > -/** > - * __i915_wait_request - wait until execution of request has finished > - * @req: duh! > - * @interruptible: do an interruptible wait (normally yes) > - * @timeout: in - how long to wait (NULL forever); out - how much time remaining > - * @rps: RPS client > - * > - * Note: It is of utmost importance that the passed in seqno and reset_counter > - * values have been read by the caller in an smp safe manner. Where read-side > - * locks are involved, it is sufficient to read the reset_counter before > - * unlocking the lock that protects the seqno. For lockless tricks, the > - * reset_counter _must_ be read before, and an appropriate smp_rmb must be > - * inserted. > - * > - * Returns 0 if the request was found within the alloted time. Else returns the > - * errno with remaining time filled in timeout argument. > - */ > -int __i915_wait_request(struct drm_i915_gem_request *req, > - bool interruptible, > - s64 *timeout, > - struct intel_rps_client *rps) > -{ > - int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > - DEFINE_WAIT(reset); > - struct intel_wait wait; > - unsigned long timeout_remain; > - s64 before = 0; /* Only to silence a compiler warning. */ > - int ret = 0; > - > - might_sleep(); > - > - if (list_empty(&req->list)) > - return 0; > - > - if (i915_gem_request_completed(req)) > - return 0; > - > - timeout_remain = MAX_SCHEDULE_TIMEOUT; > - if (timeout) { > - if (WARN_ON(*timeout < 0)) > - return -EINVAL; > - > - if (*timeout == 0) > - return -ETIME; > - > - timeout_remain = nsecs_to_jiffies_timeout(*timeout); > - > - /* > - * Record current time in case interrupted by signal, or wedged. > - */ > - before = ktime_get_raw_ns(); > - } > - > - trace_i915_gem_request_wait_begin(req); > - > - /* This client is about to stall waiting for the GPU. In many cases > - * this is undesirable and limits the throughput of the system, as > - * many clients cannot continue processing user input/output whilst > - * blocked. RPS autotuning may take tens of milliseconds to respond > - * to the GPU load and thus incurs additional latency for the client. > - * We can circumvent that by promoting the GPU frequency to maximum > - * before we wait. This makes the GPU throttle up much more quickly > - * (good for benchmarks and user experience, e.g. window animations), > - * but at a cost of spending more power processing the workload > - * (bad for battery). Not all clients even want their results > - * immediately and for them we should just let the GPU select its own > - * frequency to maximise efficiency. To prevent a single client from > - * forcing the clocks too high for the whole system, we only allow > - * each client to waitboost once in a busy period. > - */ > - if (INTEL_INFO(req->i915)->gen >= 6) > - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); > - > - /* Optimistic spin for the next ~jiffie before touching IRQs */ > - if (i915_spin_request(req, state, 5)) > - goto complete; > - > - set_current_state(state); > - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); > - > - intel_wait_init(&wait, req->seqno); > - if (intel_engine_add_wait(req->engine, &wait)) > - /* In order to check that we haven't missed the interrupt > - * as we enabled it, we need to kick ourselves to do a > - * coherent check on the seqno before we sleep. > - */ > - goto wakeup; > - > - for (;;) { > - if (signal_pending_state(state, current)) { > - ret = -ERESTARTSYS; > - break; > - } > - > - timeout_remain = io_schedule_timeout(timeout_remain); > - if (timeout_remain == 0) { > - ret = -ETIME; > - break; > - } > - > - if (intel_wait_complete(&wait)) > - break; > - > - set_current_state(state); > - > -wakeup: > - /* Carefully check if the request is complete, giving time > - * for the seqno to be visible following the interrupt. > - * We also have to check in case we are kicked by the GPU > - * reset in order to drop the struct_mutex. > - */ > - if (__i915_request_irq_complete(req)) > - break; > - > - /* Only spin if we know the GPU is processing this request */ > - if (i915_spin_request(req, state, 2)) > - break; > - } > - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); > - > - intel_engine_remove_wait(req->engine, &wait); > - __set_current_state(TASK_RUNNING); > -complete: > - trace_i915_gem_request_wait_end(req); > - > - if (timeout) { > - s64 tres = *timeout - (ktime_get_raw_ns() - before); > - > - *timeout = tres < 0 ? 0 : tres; > - > - /* > - * Apparently ktime isn't accurate enough and occasionally has a > - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch > - * things up to make the test happy. We allow up to 1 jiffy. > - * > - * This is a regrssion from the timespec->ktime conversion. > - */ > - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) > - *timeout = 0; > - } > - > - if (rps && req->seqno == req->engine->last_submitted_seqno) { > - /* The GPU is now idle and this client has stalled. > - * Since no other client has submitted a request in the > - * meantime, assume that this client is the only one > - * supplying work to the GPU but is unable to keep that > - * work supplied because it is waiting. Since the GPU is > - * then never kept fully busy, RPS autoclocking will > - * keep the clocks relatively low, causing further delays. > - * Compensate by giving the synchronous client credit for > - * a waitboost next time. > - */ > - spin_lock(&req->i915->rps.client_lock); > - list_del_init(&rps->link); > - spin_unlock(&req->i915->rps.client_lock); > - } > - > - return ret; > -} > - > -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, > - struct drm_file *file) > -{ > - struct drm_i915_file_private *file_priv; > - > - WARN_ON(!req || !file || req->file_priv); > - > - if (!req || !file) > - return -EINVAL; > - > - if (req->file_priv) > - return -EINVAL; > - > - file_priv = file->driver_priv; > - > - spin_lock(&file_priv->mm.lock); > - req->file_priv = file_priv; > - list_add_tail(&req->client_list, &file_priv->mm.request_list); > - spin_unlock(&file_priv->mm.lock); > - > - req->pid = get_pid(task_pid(current)); > - > - return 0; > -} > - > -static inline void > -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) > -{ > - struct drm_i915_file_private *file_priv = request->file_priv; > - > - if (!file_priv) > - return; > - > - spin_lock(&file_priv->mm.lock); > - list_del(&request->client_list); > - request->file_priv = NULL; > - spin_unlock(&file_priv->mm.lock); > - > - put_pid(request->pid); > - request->pid = NULL; > -} > - > -static void i915_gem_request_retire(struct drm_i915_gem_request *request) > -{ > - trace_i915_gem_request_retire(request); > - > - /* We know the GPU must have read the request to have > - * sent us the seqno + interrupt, so use the position > - * of tail of the request to update the last known position > - * of the GPU head. > - * > - * Note this requires that we are always called in request > - * completion order. > - */ > - request->ringbuf->last_retired_head = request->postfix; > - > - list_del_init(&request->list); > - i915_gem_request_remove_from_client(request); > - > - if (request->previous_context) { > - if (i915.enable_execlists) > - intel_lr_context_unpin(request->previous_context, > - request->engine); > - } > - > - i915_gem_context_unreference(request->ctx); > - i915_gem_request_unreference(request); > -} > - > -static void > -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req) > -{ > - struct intel_engine_cs *engine = req->engine; > - struct drm_i915_gem_request *tmp; > - > - lockdep_assert_held(&engine->i915->drm.struct_mutex); > - > - if (list_empty(&req->list)) > - return; > - > - do { > - tmp = list_first_entry(&engine->request_list, > - typeof(*tmp), list); > - > - i915_gem_request_retire(tmp); > - } while (tmp != req); > - > - WARN_ON(i915_verify_lists(engine->dev)); > -} > - > -/** > - * Waits for a request to be signaled, and cleans up the > - * request and object lists appropriately for that event. > - * @req: request to wait on > - */ > -int > -i915_wait_request(struct drm_i915_gem_request *req) > -{ > - struct drm_i915_private *dev_priv = req->i915; > - bool interruptible; > - int ret; > - > - interruptible = dev_priv->mm.interruptible; > - > - BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); > - > - ret = __i915_wait_request(req, interruptible, NULL, NULL); > - if (ret) > - return ret; > - > - /* If the GPU hung, we want to keep the requests to find the guilty. */ > - if (!i915_reset_in_progress(&dev_priv->gpu_error)) > - __i915_gem_request_retire__upto(req); > - > - return 0; > -} > - > /** > * Ensures that all rendering to the object has completed and the object is > * safe to unbind from the GTT or access from the CPU. > @@ -1740,7 +1381,7 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, > i915_gem_object_retire__write(obj); > > if (!i915_reset_in_progress(&req->i915->gpu_error)) > - __i915_gem_request_retire__upto(req); > + i915_gem_request_retire_upto(req); > } > > /* A nonblocking variant of the above wait. This is a highly dangerous routine > @@ -2761,193 +2402,6 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) > drm_gem_object_unreference(&obj->base); > } > > -static int > -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) > -{ > - struct intel_engine_cs *engine; > - int ret; > - > - /* Carefully retire all requests without writing to the rings */ > - for_each_engine(engine, dev_priv) { > - ret = intel_engine_idle(engine); > - if (ret) > - return ret; > - } > - i915_gem_retire_requests(dev_priv); > - > - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ > - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { > - while (intel_kick_waiters(dev_priv) || > - intel_kick_signalers(dev_priv)) > - yield(); > - } > - > - /* Finally reset hw state */ > - for_each_engine(engine, dev_priv) > - intel_ring_init_seqno(engine, seqno); > - > - return 0; > -} > - > -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) > -{ > - struct drm_i915_private *dev_priv = to_i915(dev); > - int ret; > - > - if (seqno == 0) > - return -EINVAL; > - > - /* HWS page needs to be set less than what we > - * will inject to ring > - */ > - ret = i915_gem_init_seqno(dev_priv, seqno - 1); > - if (ret) > - return ret; > - > - /* Carefully set the last_seqno value so that wrap > - * detection still works > - */ > - dev_priv->next_seqno = seqno; > - dev_priv->last_seqno = seqno - 1; > - if (dev_priv->last_seqno == 0) > - dev_priv->last_seqno--; > - > - return 0; > -} > - > -int > -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) > -{ > - /* reserve 0 for non-seqno */ > - if (dev_priv->next_seqno == 0) { > - int ret = i915_gem_init_seqno(dev_priv, 0); > - if (ret) > - return ret; > - > - dev_priv->next_seqno = 1; > - } > - > - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; > - return 0; > -} > - > -static void i915_gem_mark_busy(const struct intel_engine_cs *engine) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - > - dev_priv->gt.active_engines |= intel_engine_flag(engine); > - if (dev_priv->gt.awake) > - return; > - > - intel_runtime_pm_get_noresume(dev_priv); > - dev_priv->gt.awake = true; > - > - intel_enable_gt_powersave(dev_priv); > - i915_update_gfx_val(dev_priv); > - if (INTEL_GEN(dev_priv) >= 6) > - gen6_rps_busy(dev_priv); > - > - queue_delayed_work(dev_priv->wq, > - &dev_priv->gt.retire_work, > - round_jiffies_up_relative(HZ)); > -} > - > -/* > - * NB: This function is not allowed to fail. Doing so would mean the the > - * request is not being tracked for completion but the work itself is > - * going to happen on the hardware. This would be a Bad Thing(tm). > - */ > -void __i915_add_request(struct drm_i915_gem_request *request, > - struct drm_i915_gem_object *obj, > - bool flush_caches) > -{ > - struct intel_engine_cs *engine; > - struct intel_ringbuffer *ringbuf; > - u32 request_start; > - u32 reserved_tail; > - int ret; > - > - if (WARN_ON(request == NULL)) > - return; > - > - engine = request->engine; > - ringbuf = request->ringbuf; > - > - /* > - * To ensure that this call will not fail, space for its emissions > - * should already have been reserved in the ring buffer. Let the ring > - * know that it is time to use that space up. > - */ > - request_start = intel_ring_get_tail(ringbuf); > - reserved_tail = request->reserved_space; > - request->reserved_space = 0; > - > - /* > - * Emit any outstanding flushes - execbuf can fail to emit the flush > - * after having emitted the batchbuffer command. Hence we need to fix > - * things up similar to emitting the lazy request. The difference here > - * is that the flush _must_ happen before the next request, no matter > - * what. > - */ > - if (flush_caches) { > - if (i915.enable_execlists) > - ret = logical_ring_flush_all_caches(request); > - else > - ret = intel_ring_flush_all_caches(request); > - /* Not allowed to fail! */ > - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); > - } > - > - trace_i915_gem_request_add(request); > - > - request->head = request_start; > - > - /* Whilst this request exists, batch_obj will be on the > - * active_list, and so will hold the active reference. Only when this > - * request is retired will the the batch_obj be moved onto the > - * inactive_list and lose its active reference. Hence we do not need > - * to explicitly hold another reference here. > - */ > - request->batch_obj = obj; > - > - /* Seal the request and mark it as pending execution. Note that > - * we may inspect this state, without holding any locks, during > - * hangcheck. Hence we apply the barrier to ensure that we do not > - * see a more recent value in the hws than we are tracking. > - */ > - request->emitted_jiffies = jiffies; > - request->previous_seqno = engine->last_submitted_seqno; > - smp_store_mb(engine->last_submitted_seqno, request->seqno); > - list_add_tail(&request->list, &engine->request_list); > - > - /* Record the position of the start of the request so that > - * should we detect the updated seqno part-way through the > - * GPU processing the request, we never over-estimate the > - * position of the head. > - */ > - request->postfix = intel_ring_get_tail(ringbuf); > - > - if (i915.enable_execlists) > - ret = engine->emit_request(request); > - else { > - ret = engine->add_request(request); > - > - request->tail = intel_ring_get_tail(ringbuf); > - } > - /* Not allowed to fail! */ > - WARN(ret, "emit|add_request failed: %d!\n", ret); > - /* Sanity check that the reserved size was large enough. */ > - ret = intel_ring_get_tail(ringbuf) - request_start; > - if (ret < 0) > - ret += ringbuf->size; > - WARN_ONCE(ret > reserved_tail, > - "Not enough space reserved (%d bytes) " > - "for adding the request (%d bytes)\n", > - reserved_tail, ret); > - > - i915_gem_mark_busy(engine); > -} > - > static bool i915_context_is_banned(const struct i915_gem_context *ctx) > { > unsigned long elapsed; > @@ -2979,101 +2433,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx, > } > } > > -void i915_gem_request_free(struct kref *req_ref) > -{ > - struct drm_i915_gem_request *req = container_of(req_ref, > - typeof(*req), ref); > - kmem_cache_free(req->i915->requests, req); > -} > - > -static inline int > -__i915_gem_request_alloc(struct intel_engine_cs *engine, > - struct i915_gem_context *ctx, > - struct drm_i915_gem_request **req_out) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); > - struct drm_i915_gem_request *req; > - int ret; > - > - if (!req_out) > - return -EINVAL; > - > - *req_out = NULL; > - > - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report > - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex > - * and restart. > - */ > - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); > - if (ret) > - return ret; > - > - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); > - if (req == NULL) > - return -ENOMEM; > - > - ret = i915_gem_get_seqno(engine->i915, &req->seqno); > - if (ret) > - goto err; > - > - kref_init(&req->ref); > - req->i915 = dev_priv; > - req->engine = engine; > - req->ctx = ctx; > - i915_gem_context_reference(req->ctx); > - > - /* > - * Reserve space in the ring buffer for all the commands required to > - * eventually emit this request. This is to guarantee that the > - * i915_add_request() call can't fail. Note that the reserve may need > - * to be redone if the request is not actually submitted straight > - * away, e.g. because a GPU scheduler has deferred it. > - */ > - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; > - > - if (i915.enable_execlists) > - ret = intel_logical_ring_alloc_request_extras(req); > - else > - ret = intel_ring_alloc_request_extras(req); > - if (ret) > - goto err_ctx; > - > - *req_out = req; > - return 0; > - > -err_ctx: > - i915_gem_context_unreference(ctx); > -err: > - kmem_cache_free(dev_priv->requests, req); > - return ret; > -} > - > -/** > - * i915_gem_request_alloc - allocate a request structure > - * > - * @engine: engine that we wish to issue the request on. > - * @ctx: context that the request will be associated with. > - * This can be NULL if the request is not directly related to > - * any specific user context, in which case this function will > - * choose an appropriate context to use. > - * > - * Returns a pointer to the allocated request if successful, > - * or an error code if not. > - */ > -struct drm_i915_gem_request * > -i915_gem_request_alloc(struct intel_engine_cs *engine, > - struct i915_gem_context *ctx) > -{ > - struct drm_i915_gem_request *req; > - int err; > - > - if (ctx == NULL) > - ctx = engine->i915->kernel_context; > - err = __i915_gem_request_alloc(engine, ctx, &req); > - return err ? ERR_PTR(err) : req; > -} > - > struct drm_i915_gem_request * > i915_gem_find_active_request(struct intel_engine_cs *engine) > { > @@ -3147,14 +2506,14 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) > * implicit references on things like e.g. ppgtt address spaces through > * the request. > */ > - while (!list_empty(&engine->request_list)) { > + if (!list_empty(&engine->request_list)) { > struct drm_i915_gem_request *request; > > - request = list_first_entry(&engine->request_list, > - struct drm_i915_gem_request, > - list); > + request = list_last_entry(&engine->request_list, > + struct drm_i915_gem_request, > + list); > > - i915_gem_request_retire(request); > + i915_gem_request_retire_upto(request); > } > > /* Having flushed all requests from all queues, we know that all > @@ -3222,7 +2581,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) > if (!i915_gem_request_completed(request)) > break; > > - i915_gem_request_retire(request); > + i915_gem_request_retire_upto(request); > } > > /* Move any buffers on the active list that are no longer referenced > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > new file mode 100644 > index 000000000000..9e9aa6b725f7 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -0,0 +1,658 @@ > +/* > + * Copyright © 2008-2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#include "i915_drv.h" > + > +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, > + struct drm_file *file) > +{ > + struct drm_i915_private *dev_private; > + struct drm_i915_file_private *file_priv; > + > + WARN_ON(!req || !file || req->file_priv); > + > + if (!req || !file) > + return -EINVAL; > + > + if (req->file_priv) > + return -EINVAL; > + > + dev_private = req->i915; > + file_priv = file->driver_priv; > + > + spin_lock(&file_priv->mm.lock); > + req->file_priv = file_priv; > + list_add_tail(&req->client_list, &file_priv->mm.request_list); > + spin_unlock(&file_priv->mm.lock); > + > + req->pid = get_pid(task_pid(current)); > + > + return 0; > +} > + > +static inline void > +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) > +{ > + struct drm_i915_file_private *file_priv = request->file_priv; > + > + if (!file_priv) > + return; > + > + spin_lock(&file_priv->mm.lock); > + list_del(&request->client_list); > + request->file_priv = NULL; > + spin_unlock(&file_priv->mm.lock); > + > + put_pid(request->pid); > + request->pid = NULL; > +} > + > +static void i915_gem_request_retire(struct drm_i915_gem_request *request) > +{ > + trace_i915_gem_request_retire(request); > + list_del_init(&request->list); > + > + /* We know the GPU must have read the request to have > + * sent us the seqno + interrupt, so use the position > + * of tail of the request to update the last known position > + * of the GPU head. > + * > + * Note this requires that we are always called in request > + * completion order. > + */ > + request->ringbuf->last_retired_head = request->postfix; > + > + i915_gem_request_remove_from_client(request); > + > + if (request->previous_context) { > + if (i915.enable_execlists) > + intel_lr_context_unpin(request->previous_context, > + request->engine); > + } > + > + i915_gem_context_unreference(request->ctx); > + i915_gem_request_unreference(request); > +} > + > +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) > +{ > + struct intel_engine_cs *engine = req->engine; > + struct drm_i915_gem_request *tmp; > + > + lockdep_assert_held(&req->i915->drm.struct_mutex); > + > + if (list_empty(&req->list)) > + return; > + > + do { > + tmp = list_first_entry(&engine->request_list, > + typeof(*tmp), list); > + > + i915_gem_request_retire(tmp); > + } while (tmp != req); > + > + WARN_ON(i915_verify_lists(engine->dev)); > +} > + > +static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) > +{ > + if (__i915_terminally_wedged(reset_counter)) > + return -EIO; > + > + if (__i915_reset_in_progress(reset_counter)) { > + /* Non-interruptible callers can't handle -EAGAIN, hence return > + * -EIO unconditionally for these. > + */ > + if (!interruptible) > + return -EIO; > + > + return -EAGAIN; > + } > + > + return 0; > +} > + > +static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) > +{ > + struct intel_engine_cs *engine; > + int ret; > + > + /* Carefully retire all requests without writing to the rings */ > + for_each_engine(engine, dev_priv) { > + ret = intel_engine_idle(engine); > + if (ret) > + return ret; > + } > + i915_gem_retire_requests(dev_priv); > + > + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ > + if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { > + while (intel_kick_waiters(dev_priv) || > + intel_kick_signalers(dev_priv)) > + yield(); > + } > + > + /* Finally reset hw state */ > + for_each_engine(engine, dev_priv) > + intel_ring_init_seqno(engine, seqno); > + > + return 0; > +} > + > +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) > +{ > + struct drm_i915_private *dev_priv = to_i915(dev); > + int ret; > + > + if (seqno == 0) > + return -EINVAL; > + > + /* HWS page needs to be set less than what we > + * will inject to ring > + */ > + ret = i915_gem_init_seqno(dev_priv, seqno - 1); > + if (ret) > + return ret; > + > + /* Carefully set the last_seqno value so that wrap > + * detection still works > + */ > + dev_priv->next_seqno = seqno; > + dev_priv->last_seqno = seqno - 1; > + if (dev_priv->last_seqno == 0) > + dev_priv->last_seqno--; > + > + return 0; > +} > + > +static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) > +{ > + /* reserve 0 for non-seqno */ > + if (unlikely(dev_priv->next_seqno == 0)) { > + int ret; > + > + ret = i915_gem_init_seqno(dev_priv, 0); > + if (ret) > + return ret; > + > + dev_priv->next_seqno = 1; > + } > + > + *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; > + return 0; > +} > + > +static inline int > +__i915_gem_request_alloc(struct intel_engine_cs *engine, > + struct i915_gem_context *ctx, > + struct drm_i915_gem_request **req_out) > +{ > + struct drm_i915_private *dev_priv = engine->i915; > + unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); > + struct drm_i915_gem_request *req; > + int ret; > + > + if (!req_out) > + return -EINVAL; > + > + *req_out = NULL; > + > + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report > + * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex > + * and restart. > + */ > + ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); > + if (ret) > + return ret; > + > + req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); > + if (!req) > + return -ENOMEM; > + > + ret = i915_gem_get_seqno(dev_priv, &req->seqno); > + if (ret) > + goto err; > + > + kref_init(&req->ref); > + req->i915 = dev_priv; > + req->engine = engine; > + req->ctx = ctx; > + i915_gem_context_reference(ctx); > + > + /* > + * Reserve space in the ring buffer for all the commands required to > + * eventually emit this request. This is to guarantee that the > + * i915_add_request() call can't fail. Note that the reserve may need > + * to be redone if the request is not actually submitted straight > + * away, e.g. because a GPU scheduler has deferred it. > + */ > + req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; > + > + if (i915.enable_execlists) > + ret = intel_logical_ring_alloc_request_extras(req); > + else > + ret = intel_ring_alloc_request_extras(req); > + if (ret) > + goto err_ctx; > + > + *req_out = req; > + return 0; > + > +err_ctx: > + i915_gem_context_unreference(ctx); > +err: > + kmem_cache_free(dev_priv->requests, req); > + return ret; > +} > + > +/** > + * i915_gem_request_alloc - allocate a request structure > + * > + * @engine: engine that we wish to issue the request on. > + * @ctx: context that the request will be associated with. > + * This can be NULL if the request is not directly related to > + * any specific user context, in which case this function will > + * choose an appropriate context to use. > + * > + * Returns a pointer to the allocated request if successful, > + * or an error code if not. > + */ > +struct drm_i915_gem_request * > +i915_gem_request_alloc(struct intel_engine_cs *engine, > + struct i915_gem_context *ctx) > +{ > + struct drm_i915_gem_request *req; > + int err; > + > + if (!ctx) > + ctx = engine->i915->kernel_context; > + err = __i915_gem_request_alloc(engine, ctx, &req); > + return err ? ERR_PTR(err) : req; > +} > + > +static void i915_gem_mark_busy(const struct intel_engine_cs *engine) > +{ > + struct drm_i915_private *dev_priv = engine->i915; > + > + dev_priv->gt.active_engines |= intel_engine_flag(engine); > + if (dev_priv->gt.awake) > + return; > + > + intel_runtime_pm_get_noresume(dev_priv); > + dev_priv->gt.awake = true; > + > + intel_enable_gt_powersave(dev_priv); > + i915_update_gfx_val(dev_priv); > + if (INTEL_GEN(dev_priv) >= 6) > + gen6_rps_busy(dev_priv); > + > + queue_delayed_work(dev_priv->wq, > + &dev_priv->gt.retire_work, > + round_jiffies_up_relative(HZ)); > +} > + > +/* > + * NB: This function is not allowed to fail. Doing so would mean the the > + * request is not being tracked for completion but the work itself is > + * going to happen on the hardware. This would be a Bad Thing(tm). > + */ > +void __i915_add_request(struct drm_i915_gem_request *request, > + struct drm_i915_gem_object *obj, > + bool flush_caches) > +{ > + struct intel_engine_cs *engine; > + struct intel_ringbuffer *ringbuf; > + u32 request_start; > + u32 reserved_tail; > + int ret; > + > + if (WARN_ON(!request)) > + return; > + > + engine = request->engine; > + ringbuf = request->ringbuf; > + > + /* > + * To ensure that this call will not fail, space for its emissions > + * should already have been reserved in the ring buffer. Let the ring > + * know that it is time to use that space up. > + */ > + request_start = intel_ring_get_tail(ringbuf); > + reserved_tail = request->reserved_space; > + request->reserved_space = 0; > + > + /* > + * Emit any outstanding flushes - execbuf can fail to emit the flush > + * after having emitted the batchbuffer command. Hence we need to fix > + * things up similar to emitting the lazy request. The difference here > + * is that the flush _must_ happen before the next request, no matter > + * what. > + */ > + if (flush_caches) { > + if (i915.enable_execlists) > + ret = logical_ring_flush_all_caches(request); > + else > + ret = intel_ring_flush_all_caches(request); > + /* Not allowed to fail! */ > + WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); > + } > + > + trace_i915_gem_request_add(request); > + > + request->head = request_start; > + > + /* Whilst this request exists, batch_obj will be on the > + * active_list, and so will hold the active reference. Only when this > + * request is retired will the the batch_obj be moved onto the > + * inactive_list and lose its active reference. Hence we do not need > + * to explicitly hold another reference here. > + */ > + request->batch_obj = obj; > + > + /* Seal the request and mark it as pending execution. Note that > + * we may inspect this state, without holding any locks, during > + * hangcheck. Hence we apply the barrier to ensure that we do not > + * see a more recent value in the hws than we are tracking. > + */ > + request->emitted_jiffies = jiffies; > + request->previous_seqno = engine->last_submitted_seqno; > + smp_store_mb(engine->last_submitted_seqno, request->seqno); > + list_add_tail(&request->list, &engine->request_list); > + > + /* Record the position of the start of the request so that > + * should we detect the updated seqno part-way through the > + * GPU processing the request, we never over-estimate the > + * position of the head. > + */ > + request->postfix = intel_ring_get_tail(ringbuf); > + > + if (i915.enable_execlists) { > + ret = engine->emit_request(request); > + } else { > + ret = engine->add_request(request); > + > + request->tail = intel_ring_get_tail(ringbuf); > + } > + /* Not allowed to fail! */ > + WARN(ret, "emit|add_request failed: %d!\n", ret); > + /* Sanity check that the reserved size was large enough. */ > + ret = intel_ring_get_tail(ringbuf) - request_start; > + if (ret < 0) > + ret += ringbuf->size; > + WARN_ONCE(ret > reserved_tail, > + "Not enough space reserved (%d bytes) " > + "for adding the request (%d bytes)\n", > + reserved_tail, ret); > + > + i915_gem_mark_busy(engine); > +} > + > +static unsigned long local_clock_us(unsigned int *cpu) > +{ > + unsigned long t; > + > + /* Cheaply and approximately convert from nanoseconds to microseconds. > + * The result and subsequent calculations are also defined in the same > + * approximate microseconds units. The principal source of timing > + * error here is from the simple truncation. > + * > + * Note that local_clock() is only defined wrt to the current CPU; > + * the comparisons are no longer valid if we switch CPUs. Instead of > + * blocking preemption for the entire busywait, we can detect the CPU > + * switch and use that as indicator of system load and a reason to > + * stop busywaiting, see busywait_stop(). > + */ > + *cpu = get_cpu(); > + t = local_clock() >> 10; > + put_cpu(); > + > + return t; > +} > + > +static bool busywait_stop(unsigned long timeout, unsigned int cpu) > +{ > + unsigned int this_cpu; > + > + if (time_after(local_clock_us(&this_cpu), timeout)) > + return true; > + > + return this_cpu != cpu; > +} > + > +bool __i915_spin_request(const struct drm_i915_gem_request *req, > + int state, unsigned long timeout_us) > +{ > + unsigned int cpu; > + > + /* When waiting for high frequency requests, e.g. during synchronous > + * rendering split between the CPU and GPU, the finite amount of time > + * required to set up the irq and wait upon it limits the response > + * rate. By busywaiting on the request completion for a short while we > + * can service the high frequency waits as quick as possible. However, > + * if it is a slow request, we want to sleep as quickly as possible. > + * The tradeoff between waiting and sleeping is roughly the time it > + * takes to sleep on a request, on the order of a microsecond. > + */ > + > + timeout_us += local_clock_us(&cpu); > + do { > + if (i915_gem_request_completed(req)) > + return true; > + > + if (signal_pending_state(state, current)) > + break; > + > + if (busywait_stop(timeout_us, cpu)) > + break; > + > + cpu_relax_lowlatency(); > + } while (!need_resched()); > + > + return false; > +} > + > +/** > + * __i915_wait_request - wait until execution of request has finished > + * @req: duh! > + * @interruptible: do an interruptible wait (normally yes) > + * @timeout: in - how long to wait (NULL forever); out - how much time remaining > + * @rps: client to charge for RPS boosting > + * > + * Note: It is of utmost importance that the passed in seqno and reset_counter > + * values have been read by the caller in an smp safe manner. Where read-side > + * locks are involved, it is sufficient to read the reset_counter before > + * unlocking the lock that protects the seqno. For lockless tricks, the > + * reset_counter _must_ be read before, and an appropriate smp_rmb must be > + * inserted. > + * > + * Returns 0 if the request was found within the alloted time. Else returns the > + * errno with remaining time filled in timeout argument. > + */ > +int __i915_wait_request(struct drm_i915_gem_request *req, > + bool interruptible, > + s64 *timeout, > + struct intel_rps_client *rps) > +{ > + int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > + DEFINE_WAIT(reset); > + struct intel_wait wait; > + unsigned long timeout_remain; > + int ret = 0; > + > + might_sleep(); > + > + if (list_empty(&req->list)) > + return 0; > + > + if (i915_gem_request_completed(req)) > + return 0; > + > + timeout_remain = MAX_SCHEDULE_TIMEOUT; > + if (timeout) { > + if (WARN_ON(*timeout < 0)) > + return -EINVAL; > + > + if (*timeout == 0) > + return -ETIME; > + > + /* Record current time in case interrupted, or wedged */ > + timeout_remain = nsecs_to_jiffies_timeout(*timeout); > + *timeout += ktime_get_raw_ns(); > + } > + > + trace_i915_gem_request_wait_begin(req); > + > + /* This client is about to stall waiting for the GPU. In many cases > + * this is undesirable and limits the throughput of the system, as > + * many clients cannot continue processing user input/output whilst > + * blocked. RPS autotuning may take tens of milliseconds to respond > + * to the GPU load and thus incurs additional latency for the client. > + * We can circumvent that by promoting the GPU frequency to maximum > + * before we wait. This makes the GPU throttle up much more quickly > + * (good for benchmarks and user experience, e.g. window animations), > + * but at a cost of spending more power processing the workload > + * (bad for battery). Not all clients even want their results > + * immediately and for them we should just let the GPU select its own > + * frequency to maximise efficiency. To prevent a single client from > + * forcing the clocks too high for the whole system, we only allow > + * each client to waitboost once in a busy period. > + */ > + if (INTEL_GEN(req->i915) >= 6) > + gen6_rps_boost(req->i915, rps, req->emitted_jiffies); > + > + /* Optimistic spin for the next ~jiffie before touching IRQs */ > + if (i915_spin_request(req, state, 5)) > + goto complete; > + > + set_current_state(state); > + add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); > + > + intel_wait_init(&wait, req->seqno); > + if (intel_engine_add_wait(req->engine, &wait)) > + /* In order to check that we haven't missed the interrupt > + * as we enabled it, we need to kick ourselves to do a > + * coherent check on the seqno before we sleep. > + */ > + goto wakeup; > + > + for (;;) { > + if (signal_pending_state(state, current)) { > + ret = -ERESTARTSYS; > + break; > + } > + > + timeout_remain = io_schedule_timeout(timeout_remain); > + if (timeout_remain == 0) { > + ret = -ETIME; > + break; > + } > + > + if (intel_wait_complete(&wait)) > + break; > + > + set_current_state(state); > + > +wakeup: > + /* Carefully check if the request is complete, giving time > + * for the seqno to be visible following the interrupt. > + * We also have to check in case we are kicked by the GPU > + * reset in order to drop the struct_mutex. > + */ > + if (__i915_request_irq_complete(req)) > + break; > + > + /* Only spin if we know the GPU is processing this request */ > + if (i915_spin_request(req, state, 2)) > + break; > + } > + remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); > + > + intel_engine_remove_wait(req->engine, &wait); > + __set_current_state(TASK_RUNNING); > +complete: > + trace_i915_gem_request_wait_end(req); > + > + if (timeout) { > + *timeout -= ktime_get_raw_ns(); > + if (*timeout < 0) > + *timeout = 0; > + > + /* > + * Apparently ktime isn't accurate enough and occasionally has a > + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch > + * things up to make the test happy. We allow up to 1 jiffy. > + * > + * This is a regrssion from the timespec->ktime conversion. > + */ > + if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) > + *timeout = 0; > + } > + > + if (rps && req->seqno == req->engine->last_submitted_seqno) { > + /* The GPU is now idle and this client has stalled. > + * Since no other client has submitted a request in the > + * meantime, assume that this client is the only one > + * supplying work to the GPU but is unable to keep that > + * work supplied because it is waiting. Since the GPU is > + * then never kept fully busy, RPS autoclocking will > + * keep the clocks relatively low, causing further delays. > + * Compensate by giving the synchronous client credit for > + * a waitboost next time. > + */ > + spin_lock(&req->i915->rps.client_lock); > + list_del_init(&rps->link); > + spin_unlock(&req->i915->rps.client_lock); > + } > + > + return ret; > +} > + > +/** > + * Waits for a request to be signaled, and cleans up the > + * request and object lists appropriately for that event. > + */ > +int i915_wait_request(struct drm_i915_gem_request *req) > +{ > + int ret; > + > + GEM_BUG_ON(!req); > + lockdep_assert_held(&req->i915->drm.struct_mutex); > + > + ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL); > + if (ret) > + return ret; > + > + /* If the GPU hung, we want to keep the requests to find the guilty. */ > + if (!i915_reset_in_progress(&req->i915->gpu_error)) > + i915_gem_request_retire_upto(req); > + > + return 0; > +} > + > +void i915_gem_request_free(struct kref *req_ref) > +{ > + struct drm_i915_gem_request *req = > + container_of(req_ref, typeof(*req), ref); > + kmem_cache_free(req->i915->requests, req); > +} > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > new file mode 100644 > index 000000000000..ea700befcc28 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -0,0 +1,238 @@ > +/* > + * Copyright © 2008-2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#ifndef I915_GEM_REQUEST_H > +#define I915_GEM_REQUEST_H > + > +/** > + * Request queue structure. > + * > + * The request queue allows us to note sequence numbers that have been emitted > + * and may be associated with active buffers to be retired. > + * > + * By keeping this list, we can avoid having to do questionable sequence > + * number comparisons on buffer last_read|write_seqno. It also allows an > + * emission time to be associated with the request for tracking how far ahead > + * of the GPU the submission is. > + * > + * The requests are reference counted, so upon creation they should have an > + * initial reference taken using kref_init > + */ > +struct drm_i915_gem_request { > + struct kref ref; > + > + /** On Which ring this request was generated */ > + struct drm_i915_private *i915; > + > + /** > + * Context and ring buffer related to this request > + * Contexts are refcounted, so when this request is associated with a > + * context, we must increment the context's refcount, to guarantee that > + * it persists while any request is linked to it. Requests themselves > + * are also refcounted, so the request will only be freed when the last > + * reference to it is dismissed, and the code in > + * i915_gem_request_free() will then decrement the refcount on the > + * context. > + */ > + struct i915_gem_context *ctx; > + struct intel_engine_cs *engine; > + struct intel_ringbuffer *ringbuf; > + struct intel_signal_node signaling; > + > + /** GEM sequence number associated with the previous request, > + * when the HWS breadcrumb is equal to this the GPU is processing > + * this request. > + */ > + u32 previous_seqno; > + > + /** GEM sequence number associated with this request, > + * when the HWS breadcrumb is equal or greater than this the GPU > + * has finished processing this request. > + */ > + u32 seqno; > + > + /** Position in the ringbuffer of the start of the request */ > + u32 head; > + > + /** > + * Position in the ringbuffer of the start of the postfix. > + * This is required to calculate the maximum available ringbuffer > + * space without overwriting the postfix. > + */ > + u32 postfix; > + > + /** Position in the ringbuffer of the end of the whole request */ > + u32 tail; > + > + /** Preallocate space in the ringbuffer for the emitting the request */ > + u32 reserved_space; > + > + /** > + * Context related to the previous request. > + * As the contexts are accessed by the hardware until the switch is > + * completed to a new context, the hardware may still be writing > + * to the context object after the breadcrumb is visible. We must > + * not unpin/unbind/prune that object whilst still active and so > + * we keep the previous context pinned until the following (this) > + * request is retired. > + */ > + struct i915_gem_context *previous_context; > + > + /** Batch buffer related to this request if any (used for > + * error state dump only). > + */ > + struct drm_i915_gem_object *batch_obj; > + > + /** Time at which this request was emitted, in jiffies. */ > + unsigned long emitted_jiffies; > + > + /** global list entry for this request */ > + struct list_head list; > + > + struct drm_i915_file_private *file_priv; > + /** file_priv list entry for this request */ > + struct list_head client_list; > + > + /** process identifier submitting this request */ > + struct pid *pid; > + > + /** > + * The ELSP only accepts two elements at a time, so we queue > + * context/tail pairs on a given queue (ring->execlist_queue) until the > + * hardware is available. The queue serves a double purpose: we also use > + * it to keep track of the up to 2 contexts currently in the hardware > + * (usually one in execution and the other queued up by the GPU): We > + * only remove elements from the head of the queue when the hardware > + * informs us that an element has been completed. > + * > + * All accesses to the queue are mediated by a spinlock > + * (ring->execlist_lock). > + */ > + > + /** Execlist link in the submission queue.*/ > + struct list_head execlist_link; > + > + /** Execlists no. of times this request has been sent to the ELSP */ > + int elsp_submitted; > + > + /** Execlists context hardware id. */ > + unsigned int ctx_hw_id; > +}; > + > +struct drm_i915_gem_request * __must_check > +i915_gem_request_alloc(struct intel_engine_cs *engine, > + struct i915_gem_context *ctx); > +void i915_gem_request_free(struct kref *req_ref); > +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, > + struct drm_file *file); > +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); > + > +static inline u32 > +i915_gem_request_get_seqno(struct drm_i915_gem_request *req) > +{ > + return req ? req->seqno : 0; > +} > + > +static inline struct intel_engine_cs * > +i915_gem_request_get_engine(struct drm_i915_gem_request *req) > +{ > + return req ? req->engine : NULL; > +} > + > +static inline struct drm_i915_gem_request * > +i915_gem_request_reference(struct drm_i915_gem_request *req) > +{ > + if (req) > + kref_get(&req->ref); > + return req; > +} > + > +static inline void > +i915_gem_request_unreference(struct drm_i915_gem_request *req) > +{ > + kref_put(&req->ref, i915_gem_request_free); > +} > + > +static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, > + struct drm_i915_gem_request *src) > +{ > + if (src) > + i915_gem_request_reference(src); > + > + if (*pdst) > + i915_gem_request_unreference(*pdst); > + > + *pdst = src; > +} > + > +void __i915_add_request(struct drm_i915_gem_request *req, > + struct drm_i915_gem_object *batch_obj, > + bool flush_caches); > +#define i915_add_request(req) \ > + __i915_add_request(req, NULL, true) > +#define i915_add_request_no_flush(req) \ > + __i915_add_request(req, NULL, false) > + > +struct intel_rps_client; > + > +int __i915_wait_request(struct drm_i915_gem_request *req, > + bool interruptible, > + s64 *timeout, > + struct intel_rps_client *rps); > +int __must_check i915_wait_request(struct drm_i915_gem_request *req); > + > +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); > + > +/** > + * Returns true if seq1 is later than seq2. > + */ > +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) > +{ > + return (s32)(seq1 - seq2) >= 0; > +} > + > +static inline bool > +i915_gem_request_started(const struct drm_i915_gem_request *req) > +{ > + return i915_seqno_passed(intel_engine_get_seqno(req->engine), > + req->previous_seqno); > +} > + > +static inline bool > +i915_gem_request_completed(const struct drm_i915_gem_request *req) > +{ > + return i915_seqno_passed(intel_engine_get_seqno(req->engine), > + req->seqno); > +} > + > +bool __i915_spin_request(const struct drm_i915_gem_request *request, > + int state, unsigned long timeout_us); > +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, > + int state, unsigned long timeout_us) > +{ > + return (i915_gem_request_started(request) && > + __i915_spin_request(request, state, timeout_us)); > +} > + > +#endif /* I915_GEM_REQUEST_H */ > -- > 2.8.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx