On ke, 2016-07-20 at 14:12 +0100, Chris Wilson wrote: > If is simpler and leads to more readable code through the callstack if > the allocation returns the allocated struct through the return value. > > The importance of this is that it no longer looks like we accidentally > allocate requests as side-effect of calling certain functions. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 3 +- > drivers/gpu/drm/i915/i915_gem.c | 75 ++++++++---------------------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++--- > drivers/gpu/drm/i915/i915_gem_request.c | 58 ++++++++--------------- > drivers/gpu/drm/i915/i915_trace.h | 13 +++--- > drivers/gpu/drm/i915/intel_display.c | 36 ++++++-------- > drivers/gpu/drm/i915/intel_lrc.c | 2 +- > drivers/gpu/drm/i915/intel_overlay.c | 20 ++++---- > 8 files changed, 79 insertions(+), 140 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index f32ec6db5bfa..3f67431577e3 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3168,8 +3168,7 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) > > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, > - struct drm_i915_gem_request **to_req); > + struct drm_i915_gem_request *to); > void i915_vma_move_to_active(struct i915_vma *vma, > struct drm_i915_gem_request *req); > int i915_gem_dumb_create(struct drm_file *file_priv, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 95dbcfd94a80..77d7c0b012f4 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2842,51 +2842,35 @@ out: > > static int > __i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, > - struct drm_i915_gem_request *from_req, > - struct drm_i915_gem_request **to_req) > + struct drm_i915_gem_request *to, > + struct drm_i915_gem_request *from) > { > - struct intel_engine_cs *from; > int ret; > > - from = i915_gem_request_get_engine(from_req); > - if (to == from) > + if (to->engine == from->engine) > return 0; > > - if (i915_gem_request_completed(from_req)) > + if (i915_gem_request_completed(from)) > return 0; > > if (!i915.semaphores) { > - struct drm_i915_private *i915 = to_i915(obj->base.dev); > - ret = __i915_wait_request(from_req, > - i915->mm.interruptible, > + ret = __i915_wait_request(from, > + from->i915->mm.interruptible, > NULL, > NO_WAITBOOST); > if (ret) > return ret; > > - i915_gem_object_retire_request(obj, from_req); > + i915_gem_object_retire_request(obj, from); > } else { > - int idx = intel_engine_sync_index(from, to); > - u32 seqno = i915_gem_request_get_seqno(from_req); > + int idx = intel_engine_sync_index(from->engine, to->engine); > + u32 seqno = i915_gem_request_get_seqno(from); > > - WARN_ON(!to_req); > - > - if (seqno <= from->semaphore.sync_seqno[idx]) > + if (seqno <= from->engine->semaphore.sync_seqno[idx]) > return 0; > > - if (*to_req == NULL) { > - struct drm_i915_gem_request *req; > - > - req = i915_gem_request_alloc(to, NULL); > - if (IS_ERR(req)) > - return PTR_ERR(req); > - > - *to_req = req; > - } > - > - trace_i915_gem_ring_sync_to(*to_req, from, from_req); > - ret = to->semaphore.sync_to(*to_req, from, seqno); > + trace_i915_gem_ring_sync_to(to, from); Will somebody go nuts for changing the tracing just like so? I remember somebody treating it somewhat of an ABI. > + ret = to->engine->semaphore.sync_to(to, from->engine, seqno); > if (ret) > return ret; > > @@ -2894,8 +2878,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, > * might have just caused seqno wrap under > * the radar. > */ > - from->semaphore.sync_seqno[idx] = > - i915_gem_request_get_seqno(obj->last_read_req[from->id]); > + from->engine->semaphore.sync_seqno[idx] = > + i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]); > } > > return 0; > @@ -2905,17 +2889,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, > * i915_gem_object_sync - sync an object to a ring. > * > * @obj: object which may be in use on another ring. > - * @to: ring we wish to use the object on. May be NULL. > - * @to_req: request we wish to use the object for. See below. > - * This will be allocated and returned if a request is > - * required but not passed in. > + * @to: request we are wishing to use > * > * This code is meant to abstract object synchronization with the GPU. > - * Calling with NULL implies synchronizing the object with the CPU > - * rather than a particular GPU ring. Conceptually we serialise writes > - * between engines inside the GPU. We only allow one engine to write > - * into a buffer at any time, but multiple readers. To ensure each has > - * a coherent view of memory, we must: > + * Conceptually we serialise writes between engines inside the GPU. > + * We only allow one engine to write into a buffer at any time, but > + * multiple readers. To ensure each has a coherent view of memory, we must: > * > * - If there is an outstanding write request to the object, the new > * request must wait for it to complete (either CPU or in hw, requests > @@ -2924,22 +2903,11 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, > * - If we are a write request (pending_write_domain is set), the new > * request must wait for outstanding read requests to complete. > * > - * For CPU synchronisation (NULL to) no request is required. For syncing with > - * rings to_req must be non-NULL. However, a request does not have to be > - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a > - * request will be allocated automatically and returned through *to_req. Note > - * that it is not guaranteed that commands will be emitted (because the system > - * might already be idle). Hence there is no need to create a request that > - * might never have any work submitted. Note further that if a request is > - * returned in *to_req, it is the responsibility of the caller to submit > - * that request (after potentially adding more work to it). > - * > * Returns 0 if successful, else propagates up the lower layer error. > */ > int > i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, > - struct drm_i915_gem_request **to_req) > + struct drm_i915_gem_request *to) > { > const bool readonly = obj->base.pending_write_domain == 0; > struct drm_i915_gem_request *req[I915_NUM_ENGINES]; > @@ -2948,9 +2916,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > if (!obj->active) > return 0; > > - if (to == NULL) > - return i915_gem_object_wait_rendering(obj, readonly); > - > n = 0; > if (readonly) { > if (obj->last_write_req) > @@ -2961,7 +2926,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > req[n++] = obj->last_read_req[i]; > } > for (i = 0; i < n; i++) { > - ret = __i915_gem_object_sync(obj, to, req[i], to_req); > + ret = __i915_gem_object_sync(obj, to, req[i]); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 2a4841256f8e..5cea95c6f98b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -981,7 +981,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj = vma->obj; > > if (obj->active & other_rings) { > - ret = i915_gem_object_sync(obj, req->engine, &req); > + ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > } > @@ -1426,7 +1426,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > { > struct drm_i915_private *dev_priv = to_i915(dev); > struct i915_ggtt *ggtt = &dev_priv->ggtt; > - struct drm_i915_gem_request *req = NULL; > struct eb_vmas *eb; > struct drm_i915_gem_object *batch_obj; > struct drm_i915_gem_exec_object2 shadow_exec_entry; > @@ -1614,13 +1613,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); > > /* Allocate a request for this batch buffer nice and early. */ > - req = i915_gem_request_alloc(engine, ctx); > - if (IS_ERR(req)) { > - ret = PTR_ERR(req); > + params->request = i915_gem_request_alloc(engine, ctx); > + if (IS_ERR(params->request)) { > + ret = PTR_ERR(params->request); > goto err_batch_unpin; > } > > - ret = i915_gem_request_add_to_client(req, file); > + ret = i915_gem_request_add_to_client(params->request, file); > if (ret) > goto err_request; > > @@ -1636,7 +1635,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > params->dispatch_flags = dispatch_flags; > params->batch_obj = batch_obj; > params->ctx = ctx; > - params->request = req; Not sure why you especially want to always use params->request form? > > ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); > err_request: > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 3a566abf5219..2153b4fe4a1f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -292,10 +292,21 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) > return 0; > } > > -static inline int > -__i915_gem_request_alloc(struct intel_engine_cs *engine, > - struct i915_gem_context *ctx, > - struct drm_i915_gem_request **req_out) > +/** > + * i915_gem_request_alloc - allocate a request structure > + * > + * @engine: engine that we wish to issue the request on. > + * @ctx: context that the request will be associated with. > + * This can be NULL if the request is not directly related to > + * any specific user context, in which case this function will > + * choose an appropriate context to use. > + * > + * Returns a pointer to the allocated request if successful, > + * or an error code if not. > + */ > +struct drm_i915_gem_request * > +i915_gem_request_alloc(struct intel_engine_cs *engine, > + struct i915_gem_context *ctx) > { > struct drm_i915_private *dev_priv = engine->i915; > unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); > @@ -303,18 +314,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, > u32 seqno; > int ret; > > - if (!req_out) > - return -EINVAL; > - > - *req_out = NULL; > - > /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report > * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex > * and restart. > */ > ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); > if (ret) > - return ret; > + return ERR_PTR(ret); > > /* Move the oldest request to the slab-cache (if not in use!) */ > if (!list_empty(&engine->request_list)) { > @@ -326,7 +332,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, > > req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); > if (!req) > - return -ENOMEM; > + return ERR_PTR(-ENOMEM); > > ret = i915_gem_get_seqno(dev_priv, &seqno); > if (ret) > @@ -359,39 +365,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, > if (ret) > goto err_ctx; > > - *req_out = req; > - return 0; > + return req; > > err_ctx: > i915_gem_context_put(ctx); > err: > kmem_cache_free(dev_priv->requests, req); > - return ret; > -} > - > -/** > - * i915_gem_request_alloc - allocate a request structure > - * > - * @engine: engine that we wish to issue the request on. > - * @ctx: context that the request will be associated with. > - * This can be NULL if the request is not directly related to > - * any specific user context, in which case this function will > - * choose an appropriate context to use. > - * > - * Returns a pointer to the allocated request if successful, > - * or an error code if not. > - */ > -struct drm_i915_gem_request * > -i915_gem_request_alloc(struct intel_engine_cs *engine, > - struct i915_gem_context *ctx) > -{ > - struct drm_i915_gem_request *req; > - int err; > - > - if (!ctx) > - ctx = engine->i915->kernel_context; > - err = __i915_gem_request_alloc(engine, ctx, &req); > - return err ? ERR_PTR(err) : req; > + return ERR_PTR(ret); > } > > static void i915_gem_mark_busy(const struct intel_engine_cs *engine) > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h > index 007112d1e049..9e43c0aa6e3b 100644 > --- a/drivers/gpu/drm/i915/i915_trace.h > +++ b/drivers/gpu/drm/i915/i915_trace.h > @@ -449,10 +449,9 @@ TRACE_EVENT(i915_gem_evict_vm, > ); > > TRACE_EVENT(i915_gem_ring_sync_to, > - TP_PROTO(struct drm_i915_gem_request *to_req, > - struct intel_engine_cs *from, > - struct drm_i915_gem_request *req), > - TP_ARGS(to_req, from, req), > + TP_PROTO(struct drm_i915_gem_request *to, > + struct drm_i915_gem_request *from), > + TP_ARGS(to, from), > > TP_STRUCT__entry( > __field(u32, dev) > @@ -463,9 +462,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, > > TP_fast_assign( > __entry->dev = from->i915->drm.primary->index; > - __entry->sync_from = from->id; > - __entry->sync_to = to_req->engine->id; > - __entry->seqno = req->fence.seqno; > + __entry->sync_from = from->engine->id; > + __entry->sync_to = to->engine->id; > + __entry->seqno = from->fence.seqno; > ), > > TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index bff172c45ff7..5d4420b67642 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -11583,7 +11583,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > struct intel_flip_work *work; > struct intel_engine_cs *engine; > bool mmio_flip; > - struct drm_i915_gem_request *request = NULL; > + struct drm_i915_gem_request *request; > int ret; > > /* > @@ -11690,22 +11690,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > > mmio_flip = use_mmio_flip(engine, obj); > > - /* When using CS flips, we want to emit semaphores between rings. > - * However, when using mmio flips we will create a task to do the > - * synchronisation, so all we want here is to pin the framebuffer > - * into the display plane and skip any waits. > - */ > - if (!mmio_flip) { > - ret = i915_gem_object_sync(obj, engine, &request); > - if (!ret && !request) { > - request = i915_gem_request_alloc(engine, NULL); > - ret = PTR_ERR_OR_ZERO(request); > - } > - > - if (ret) > - goto cleanup_pending; > - } > - > ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); > if (ret) > goto cleanup_pending; > @@ -11723,14 +11707,24 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > > schedule_work(&work->mmio_work); > } else { > - i915_gem_request_assign(&work->flip_queued_req, request); > + request = i915_gem_request_alloc(engine, engine->last_context); > + if (IS_ERR(request)) { > + ret = PTR_ERR(request); > + goto cleanup_unpin; > + } > + > + ret = i915_gem_object_sync(obj, request); > + if (ret) > + goto cleanup_request; > + > ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, > page_flip_flags); > if (ret) > - goto cleanup_unpin; > + goto cleanup_request; > > intel_mark_page_flip_active(intel_crtc, work); > > + work->flip_queued_req = i915_gem_request_get(request); If I understood it correctly, result should be equivalent, no functional changes. Regards, Joonas > i915_add_request_no_flush(request); > } > > @@ -11745,11 +11739,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > > return 0; > > +cleanup_request: > + i915_add_request_no_flush(request); > cleanup_unpin: > intel_unpin_fb_obj(fb, crtc->primary->state->rotation); > cleanup_pending: > - if (!IS_ERR_OR_NULL(request)) > - i915_add_request_no_flush(request); > atomic_dec(&intel_crtc->unpin_work_count); > mutex_unlock(&dev->struct_mutex); > cleanup: > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 3158a1a38644..6cd0e24ed50c 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -655,7 +655,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj = vma->obj; > > if (obj->active & other_rings) { > - ret = i915_gem_object_sync(obj, req->engine, &req); > + ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index a5071e281088..356a1f6f95aa 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -229,11 +229,18 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, > return 0; > } > > +static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) > +{ > + struct drm_i915_private *dev_priv = overlay->i915; > + struct intel_engine_cs *engine = &dev_priv->engine[RCS]; > + > + return i915_gem_request_alloc(engine, dev_priv->kernel_context); > +} > + > /* overlay needs to be disable in OCMD reg */ > static int intel_overlay_on(struct intel_overlay *overlay) > { > struct drm_i915_private *dev_priv = overlay->i915; > - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; > struct drm_i915_gem_request *req; > struct intel_ring *ring; > int ret; > @@ -241,7 +248,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) > WARN_ON(overlay->active); > WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); > > - req = i915_gem_request_alloc(engine, NULL); > + req = alloc_request(overlay); > if (IS_ERR(req)) > return PTR_ERR(req); > > @@ -268,7 +275,6 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > bool load_polyphase_filter) > { > struct drm_i915_private *dev_priv = overlay->i915; > - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; > struct drm_i915_gem_request *req; > struct intel_ring *ring; > u32 flip_addr = overlay->flip_addr; > @@ -285,7 +291,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > if (tmp & (1 << 17)) > DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); > > - req = i915_gem_request_alloc(engine, NULL); > + req = alloc_request(overlay); > if (IS_ERR(req)) > return PTR_ERR(req); > > @@ -338,7 +344,6 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) > static int intel_overlay_off(struct intel_overlay *overlay) > { > struct drm_i915_private *dev_priv = overlay->i915; > - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; > struct drm_i915_gem_request *req; > struct intel_ring *ring; > u32 flip_addr = overlay->flip_addr; > @@ -352,7 +357,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) > * of the hw. Do it in both cases */ > flip_addr |= OFC_UPDATE; > > - req = i915_gem_request_alloc(engine, NULL); > + req = alloc_request(overlay); > if (IS_ERR(req)) > return PTR_ERR(req); > > @@ -412,7 +417,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) > static int intel_overlay_release_old_vid(struct intel_overlay *overlay) > { > struct drm_i915_private *dev_priv = overlay->i915; > - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; > int ret; > > lockdep_assert_held(&dev_priv->drm.struct_mutex); > @@ -428,7 +432,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) > struct drm_i915_gem_request *req; > struct intel_ring *ring; > > - req = i915_gem_request_alloc(engine, NULL); > + req = alloc_request(overlay); > if (IS_ERR(req)) > return PTR_ERR(req); > -- Joonas Lahtinen Open Source Technology Center Intel Corporation _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx