On 12/17/2015 09:43 AM, Jesse Barnes wrote: > On 12/11/2015 05:11 AM, John.C.Harrison@xxxxxxxxx wrote: >> From: John Harrison <John.C.Harrison@xxxxxxxxx> >> >> There is a construct in the linux kernel called 'struct fence' that is >> intended to keep track of work that is executed on hardware. I.e. it >> solves the basic problem that the drivers 'struct >> drm_i915_gem_request' is trying to address. The request structure does >> quite a lot more than simply track the execution progress so is very >> definitely still required. However, the basic completion status side >> could be updated to use the ready made fence implementation and gain >> all the advantages that provides. >> >> This patch makes the first step of integrating a struct fence into the >> request. It replaces the explicit reference count with that of the >> fence. It also replaces the 'is completed' test with the fence's >> equivalent. Currently, that simply chains on to the original request >> implementation. A future patch will improve this. >> >> v3: Updated after review comments by Tvrtko Ursulin. Added fence >> context/seqno pair to the debugfs request info. Renamed fence 'driver >> name' to just 'i915'. Removed BUG_ONs. >> >> For: VIZ-5190 >> Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_debugfs.c | 5 +-- >> drivers/gpu/drm/i915/i915_drv.h | 45 +++++++++++++------------- >> drivers/gpu/drm/i915/i915_gem.c | 56 ++++++++++++++++++++++++++++++--- >> drivers/gpu/drm/i915/intel_lrc.c | 1 + >> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + >> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ >> 6 files changed, 81 insertions(+), 30 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c >> index 7415606..5b31186 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -709,11 +709,12 @@ static int i915_gem_request_info(struct seq_file *m, void *data) >> task = NULL; >> if (req->pid) >> task = pid_task(req->pid, PIDTYPE_PID); >> - seq_printf(m, " %x @ %d: %s [%d]\n", >> + seq_printf(m, " %x @ %d: %s [%d], fence = %u.%u\n", >> req->seqno, >> (int) (jiffies - req->emitted_jiffies), >> task ? task->comm : "<unknown>", >> - task ? task->pid : -1); >> + task ? task->pid : -1, >> + req->fence.context, req->fence.seqno); >> rcu_read_unlock(); >> } >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 436149e..aa5cba7 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -51,6 +51,7 @@ >> #include <linux/kref.h> >> #include <linux/pm_qos.h> >> #include "intel_guc.h" >> +#include <linux/fence.h> >> >> /* General customization: >> */ >> @@ -2174,7 +2175,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, >> * initial reference taken using kref_init >> */ >> struct drm_i915_gem_request { >> - struct kref ref; >> + /** >> + * Underlying object for implementing the signal/wait stuff. >> + * NB: Never call fence_later() or return this fence object to user >> + * land! Due to lazy allocation, scheduler re-ordering, pre-emption, >> + * etc., there is no guarantee at all about the validity or >> + * sequentiality of the fence's seqno! It is also unsafe to let >> + * anything outside of the i915 driver get hold of the fence object >> + * as the clean up when decrementing the reference count requires >> + * holding the driver mutex lock. >> + */ >> + struct fence fence; >> >> /** On Which ring this request was generated */ >> struct drm_i915_private *i915; >> @@ -2251,7 +2262,13 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, >> struct intel_context *ctx, >> struct drm_i915_gem_request **req_out); >> void i915_gem_request_cancel(struct drm_i915_gem_request *req); >> -void i915_gem_request_free(struct kref *req_ref); >> + >> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >> + bool lazy_coherency) >> +{ >> + return fence_is_signaled(&req->fence); >> +} >> + >> int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, >> struct drm_file *file); >> >> @@ -2271,7 +2288,7 @@ static inline struct drm_i915_gem_request * >> i915_gem_request_reference(struct drm_i915_gem_request *req) >> { >> if (req) >> - kref_get(&req->ref); >> + fence_get(&req->fence); >> return req; >> } >> >> @@ -2279,7 +2296,7 @@ static inline void >> i915_gem_request_unreference(struct drm_i915_gem_request *req) >> { >> WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); >> - kref_put(&req->ref, i915_gem_request_free); >> + fence_put(&req->fence); >> } >> >> static inline void >> @@ -2291,7 +2308,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) >> return; >> >> dev = req->ring->dev; >> - if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex)) >> + if (kref_put_mutex(&req->fence.refcount, fence_release, &dev->struct_mutex)) >> mutex_unlock(&dev->struct_mutex); >> } >> >> @@ -2308,12 +2325,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, >> } >> >> /* >> - * XXX: i915_gem_request_completed should be here but currently needs the >> - * definition of i915_seqno_passed() which is below. It will be moved in >> - * a later patch when the call to i915_seqno_passed() is obsoleted... >> - */ >> - >> -/* >> * A command that requires special handling by the command parser. >> */ >> struct drm_i915_cmd_descriptor { >> @@ -2916,18 +2927,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) >> return (int32_t)(seq1 - seq2) >= 0; >> } >> >> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >> - bool lazy_coherency) >> -{ >> - u32 seqno; >> - >> - BUG_ON(req == NULL); >> - >> - seqno = req->ring->get_seqno(req->ring, lazy_coherency); >> - >> - return i915_seqno_passed(seqno, req->seqno); >> -} >> - >> int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); >> int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); >> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index e4056a3..a1b4dbd 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -2617,12 +2617,14 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, >> } >> } >> >> -void i915_gem_request_free(struct kref *req_ref) >> +static void i915_gem_request_free(struct fence *req_fence) >> { >> - struct drm_i915_gem_request *req = container_of(req_ref, >> - typeof(*req), ref); >> + struct drm_i915_gem_request *req = container_of(req_fence, >> + typeof(*req), fence); >> struct intel_context *ctx = req->ctx; >> >> + WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); >> + >> if (req->file_priv) >> i915_gem_request_remove_from_client(req); >> >> @@ -2638,6 +2640,45 @@ void i915_gem_request_free(struct kref *req_ref) >> kmem_cache_free(req->i915->requests, req); >> } >> >> +static bool i915_gem_request_enable_signaling(struct fence *req_fence) >> +{ >> + /* Interrupt driven fences are not implemented yet.*/ >> + WARN(true, "This should not be called!"); >> + return true; >> +} >> + >> +static bool i915_gem_request_is_completed(struct fence *req_fence) >> +{ >> + struct drm_i915_gem_request *req = container_of(req_fence, >> + typeof(*req), fence); >> + u32 seqno; >> + >> + seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/); >> + >> + return i915_seqno_passed(seqno, req->seqno); >> +} >> + >> +static const char *i915_gem_request_get_driver_name(struct fence *req_fence) >> +{ >> + return "i915"; >> +} >> + >> +static const char *i915_gem_request_get_timeline_name(struct fence *req_fence) >> +{ >> + struct drm_i915_gem_request *req = container_of(req_fence, >> + typeof(*req), fence); >> + return req->ring->name; >> +} >> + >> +static const struct fence_ops i915_gem_request_fops = { >> + .enable_signaling = i915_gem_request_enable_signaling, >> + .signaled = i915_gem_request_is_completed, >> + .wait = fence_default_wait, >> + .release = i915_gem_request_free, >> + .get_driver_name = i915_gem_request_get_driver_name, >> + .get_timeline_name = i915_gem_request_get_timeline_name, >> +}; >> + >> int i915_gem_request_alloc(struct intel_engine_cs *ring, >> struct intel_context *ctx, >> struct drm_i915_gem_request **req_out) >> @@ -2659,7 +2700,6 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, >> if (ret) >> goto err; >> >> - kref_init(&req->ref); >> req->i915 = dev_priv; >> req->ring = ring; >> req->ctx = ctx; >> @@ -2674,6 +2714,8 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, >> goto err; >> } >> >> + fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock, ring->fence_context, req->seqno); >> + >> /* >> * Reserve space in the ring buffer for all the commands required to >> * eventually emit this request. This is to guarantee that the >> @@ -4723,7 +4765,7 @@ i915_gem_init_hw(struct drm_device *dev) >> { >> struct drm_i915_private *dev_priv = dev->dev_private; >> struct intel_engine_cs *ring; >> - int ret, i, j; >> + int ret, i, j, fence_base; >> >> if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) >> return -EIO; >> @@ -4793,12 +4835,16 @@ i915_gem_init_hw(struct drm_device *dev) >> if (ret) >> goto out; >> >> + fence_base = fence_context_alloc(I915_NUM_RINGS); >> + >> /* Now it is safe to go back round and do everything else: */ >> for_each_ring(ring, dev_priv, i) { >> struct drm_i915_gem_request *req; >> >> WARN_ON(!ring->default_context); >> >> + ring->fence_context = fence_base + i; >> + >> ret = i915_gem_request_alloc(ring, ring->default_context, &req); >> if (ret) { >> i915_gem_cleanup_ringbuffer(dev); >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c >> index 06180dc..b8c8f9b 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -1920,6 +1920,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin >> ring->dev = dev; >> INIT_LIST_HEAD(&ring->active_list); >> INIT_LIST_HEAD(&ring->request_list); >> + spin_lock_init(&ring->fence_lock); >> i915_gem_batch_pool_init(dev, &ring->batch_pool); >> init_waitqueue_head(&ring->irq_queue); >> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c >> index c9b081f..f4a6403 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c >> @@ -2158,6 +2158,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, >> INIT_LIST_HEAD(&ring->request_list); >> INIT_LIST_HEAD(&ring->execlist_queue); >> INIT_LIST_HEAD(&ring->buffers); >> + spin_lock_init(&ring->fence_lock); >> i915_gem_batch_pool_init(dev, &ring->batch_pool); >> memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); >> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h >> index 58b1976..4547645 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h >> @@ -348,6 +348,9 @@ struct intel_engine_cs { >> * to encode the command length in the header). >> */ >> u32 (*get_cmd_length_mask)(u32 cmd_header); >> + >> + unsigned fence_context; >> + spinlock_t fence_lock; >> }; >> >> bool intel_ring_initialized(struct intel_engine_cs *ring); >> > > Chris has an equivalent patch that does a little more (interrupt driven waits, custom i915 wait function, etc). Can you review that instead assuming it's sufficient? > > http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=breadcrumbs&id=f062e706740d87befb8e7cd7ea337f98f0b24f52 Ok given that Chris's stuff is more ambitious, and this has already been out there a long time and seen a lot of review, I think we should go ahead with this version first, and then rebase Chris's stuff on top. So this one has my ack. Jesse _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx