On 12/11/2015 05:11 AM, John.C.Harrison@xxxxxxxxx wrote: > From: John Harrison <John.C.Harrison@xxxxxxxxx> > > The fence object used inside the request structure requires a sequence > number. Although this is not used by the i915 driver itself, it could > potentially be used by non-i915 code if the fence is passed outside of > the driver. This is the intention as it allows external kernel drivers > and user applications to wait on batch buffer completion > asynchronously via the dma-buff fence API. > > To ensure that such external users are not confused by strange things > happening with the seqno, this patch adds in a per context timeline > that can provide a guaranteed in-order seqno value for the fence. This > is safe because the scheduler will not re-order batch buffers within a > context - they are considered to be mutually dependent. > > v2: New patch in series. > > v3: Renamed/retyped timeline structure fields after review comments by > Tvrtko Ursulin. > > Added context information to the timeline's name string for better > identification in debugfs output. > > For: VIZ-5190 > Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 25 ++++++++--- > drivers/gpu/drm/i915/i915_gem.c | 80 +++++++++++++++++++++++++++++---- > drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++- > drivers/gpu/drm/i915/intel_lrc.c | 8 ++++ > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - > 5 files changed, 111 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index caf7897..7d6a7c0 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -841,6 +841,15 @@ struct i915_ctx_hang_stats { > bool banned; > }; > > +struct i915_fence_timeline { > + char name[32]; > + unsigned fence_context; > + unsigned next; > + > + struct intel_context *ctx; > + struct intel_engine_cs *ring; > +}; > + > /* This must match up with the value previously used for execbuf2.rsvd1. */ > #define DEFAULT_CONTEXT_HANDLE 0 > > @@ -885,6 +894,7 @@ struct intel_context { > struct drm_i915_gem_object *state; > struct intel_ringbuffer *ringbuf; > int pin_count; > + struct i915_fence_timeline fence_timeline; > } engine[I915_NUM_RINGS]; > > struct list_head link; > @@ -2177,13 +2187,10 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, > struct drm_i915_gem_request { > /** > * Underlying object for implementing the signal/wait stuff. > - * NB: Never call fence_later() or return this fence object to user > - * land! Due to lazy allocation, scheduler re-ordering, pre-emption, > - * etc., there is no guarantee at all about the validity or > - * sequentiality of the fence's seqno! It is also unsafe to let > - * anything outside of the i915 driver get hold of the fence object > - * as the clean up when decrementing the reference count requires > - * holding the driver mutex lock. > + * NB: Never return this fence object to user land! It is unsafe to > + * let anything outside of the i915 driver get hold of the fence > + * object as the clean up when decrementing the reference count > + * requires holding the driver mutex lock. > */ > struct fence fence; > > @@ -2263,6 +2270,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, > struct drm_i915_gem_request **req_out); > void i915_gem_request_cancel(struct drm_i915_gem_request *req); > > +int i915_create_fence_timeline(struct drm_device *dev, > + struct intel_context *ctx, > + struct intel_engine_cs *ring); > + > static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) > { > return fence_is_signaled(&req->fence); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 0801738..7a37fb7 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2665,9 +2665,32 @@ static const char *i915_gem_request_get_driver_name(struct fence *req_fence) > > static const char *i915_gem_request_get_timeline_name(struct fence *req_fence) > { > - struct drm_i915_gem_request *req = container_of(req_fence, > - typeof(*req), fence); > - return req->ring->name; > + struct drm_i915_gem_request *req; > + struct i915_fence_timeline *timeline; > + > + req = container_of(req_fence, typeof(*req), fence); > + timeline = &req->ctx->engine[req->ring->id].fence_timeline; > + > + return timeline->name; > +} > + > +static void i915_gem_request_timeline_value_str(struct fence *req_fence, char *str, int size) > +{ > + struct drm_i915_gem_request *req; > + > + req = container_of(req_fence, typeof(*req), fence); > + > + /* Last signalled timeline value ??? */ > + snprintf(str, size, "? [%d]"/*, timeline->value*/, req->ring->get_seqno(req->ring, true)); > +} > + > +static void i915_gem_request_fence_value_str(struct fence *req_fence, char *str, int size) > +{ > + struct drm_i915_gem_request *req; > + > + req = container_of(req_fence, typeof(*req), fence); > + > + snprintf(str, size, "%d [%d]", req->fence.seqno, req->seqno); > } > > static const struct fence_ops i915_gem_request_fops = { > @@ -2677,8 +2700,49 @@ static const struct fence_ops i915_gem_request_fops = { > .release = i915_gem_request_free, > .get_driver_name = i915_gem_request_get_driver_name, > .get_timeline_name = i915_gem_request_get_timeline_name, > + .fence_value_str = i915_gem_request_fence_value_str, > + .timeline_value_str = i915_gem_request_timeline_value_str, > }; > > +int i915_create_fence_timeline(struct drm_device *dev, > + struct intel_context *ctx, > + struct intel_engine_cs *ring) > +{ > + struct i915_fence_timeline *timeline; > + > + timeline = &ctx->engine[ring->id].fence_timeline; > + > + if (timeline->ring) > + return 0; > + > + timeline->fence_context = fence_context_alloc(1); > + > + /* > + * Start the timeline from seqno 0 as this is a special value > + * that is reserved for invalid sync points. > + */ > + timeline->next = 1; > + timeline->ctx = ctx; > + timeline->ring = ring; > + > + snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d", timeline->fence_context, ring->name, ctx->user_handle); > + > + return 0; > +} > + > +static unsigned i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *timeline) > +{ > + unsigned seqno; > + > + seqno = timeline->next; > + > + /* Reserve zero for invalid */ > + if (++timeline->next == 0 ) > + timeline->next = 1; > + > + return seqno; > +} > + > int i915_gem_request_alloc(struct intel_engine_cs *ring, > struct intel_context *ctx, > struct drm_i915_gem_request **req_out) > @@ -2714,7 +2778,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, > goto err; > } > > - fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock, ring->fence_context, req->seqno); > + fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock, > + ctx->engine[ring->id].fence_timeline.fence_context, > + i915_fence_timeline_get_next_seqno(&ctx->engine[ring->id].fence_timeline)); > > /* > * Reserve space in the ring buffer for all the commands required to > @@ -4765,7 +4831,7 @@ i915_gem_init_hw(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring; > - int ret, i, j, fence_base; > + int ret, i, j; > > if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) > return -EIO; > @@ -4835,16 +4901,12 @@ i915_gem_init_hw(struct drm_device *dev) > if (ret) > goto out; > > - fence_base = fence_context_alloc(I915_NUM_RINGS); > - > /* Now it is safe to go back round and do everything else: */ > for_each_ring(ring, dev_priv, i) { > struct drm_i915_gem_request *req; > > WARN_ON(!ring->default_context); > > - ring->fence_context = fence_base + i; > - > ret = i915_gem_request_alloc(ring, ring->default_context, &req); > if (ret) { > i915_gem_cleanup_ringbuffer(dev); > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 43b1c73..2798ddc 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -266,7 +266,7 @@ i915_gem_create_context(struct drm_device *dev, > { > const bool is_global_default_ctx = file_priv == NULL; > struct intel_context *ctx; > - int ret = 0; > + int i, ret = 0; > > BUG_ON(!mutex_is_locked(&dev->struct_mutex)); > > @@ -274,6 +274,19 @@ i915_gem_create_context(struct drm_device *dev, > if (IS_ERR(ctx)) > return ctx; > > + if (!i915.enable_execlists) { > + struct intel_engine_cs *ring; > + > + /* Create a per context timeline for fences */ > + for_each_ring(ring, to_i915(dev), i) { > + ret = i915_create_fence_timeline(dev, ctx, ring); > + if (ret) { > + DRM_ERROR("Fence timeline creation failed for legacy %s: %p\n", ring->name, ctx); > + goto err_destroy; > + } > + } > + } > + > if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) { > /* We may need to do things with the shrinker which > * require us to immediately switch back to the default > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index b8c8f9b..2b56651 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -2489,6 +2489,14 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx, > goto error_ringbuf; > } > > + /* Create a per context timeline for fences */ > + ret = i915_create_fence_timeline(dev, ctx, ring); > + if (ret) { > + DRM_ERROR("Fence timeline creation failed for ring %s, ctx %p\n", > + ring->name, ctx); > + goto error_ringbuf; > + } > + > ctx->engine[ring->id].ringbuf = ringbuf; > ctx->engine[ring->id].state = ctx_obj; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 4547645..356b6a8 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -349,7 +349,6 @@ struct intel_engine_cs { > */ > u32 (*get_cmd_length_mask)(u32 cmd_header); > > - unsigned fence_context; > spinlock_t fence_lock; > }; > > Yeah we definitely want this, but it'll have to be reconciled with the different request->fence patches. I'm not sure if it would be easier to move to per-context seqnos first or go this route and deal with the mismatch between global and per-ctx. Jesse _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx