With the infrastructure converted over to tracking multiple timelines in the GEM API whilst preserving the efficiency of using a single execution timeline internally, we can now assign a separate timeline to every context with full-ppgtt. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 10 ++++ drivers/gpu/drm/i915/i915_gem.c | 5 ++ drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_gem_evict.c | 11 +++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++++--- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 +- drivers/gpu/drm/i915/i915_gem_request.c | 85 +++++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_timeline.c | 1 + drivers/gpu/drm/i915/i915_gem_timeline.h | 2 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 12 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 -- 11 files changed, 101 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 48c63365184d..1187e8f51985 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3493,6 +3493,16 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_free); } +static inline struct intel_timeline * +i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct i915_address_space *vm; + + vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + return &vm->timeline.engine[engine->id]; +} + static inline bool i915_gem_context_is_default(const struct i915_gem_context *c) { return c->user_handle == DEFAULT_CONTEXT_HANDLE; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cd436f28e702..fd62e947322a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2558,6 +2558,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; struct i915_gem_context *incomplete_ctx; + struct intel_timeline *timeline; bool ring_hung; /* Ensure irq handler finishes, and not run again. */ @@ -2595,6 +2596,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) list_for_each_entry_continue(request, &engine->timeline->requests, link) if (request->ctx == incomplete_ctx) reset_request(request); + + timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine); + list_for_each_entry(request, &timeline->requests, link) + reset_request(request); } void i915_gem_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 762f3de9421f..5574bb1ca5c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev, return ctx; if (USES_FULL_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt = - i915_ppgtt_create(to_i915(dev), file_priv); + struct i915_hw_ppgtt *ppgtt; + ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 61f716c8768c..dcab3da29b04 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,13 +33,16 @@ #include "intel_drv.h" #include "i915_trace.h" -static bool -gpu_is_idle(struct drm_i915_private *dev_priv) +static bool ggtt_is_idle(struct drm_i915_private *dev_priv) { + struct i915_ggtt *ggtt = &dev_priv->ggtt; struct intel_engine_cs *engine; for_each_engine(engine, dev_priv) { - if (intel_engine_is_active(engine)) + struct intel_timeline *tl; + + tl = &ggtt->base.timeline.engine[engine->id]; + if (i915_gem_active_isset(&tl->last_request)) return false; } @@ -153,7 +156,7 @@ search_again: if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - if (gpu_is_idle(dev_priv)) { + if (ggtt_is_idle(dev_priv)) { /* If we still have pending pageflip completions, drop * back to userspace to give our workqueues time to * acquire our locks and unpin the old scanouts. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2a8eed5a70b9..0b7541b18195 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2112,8 +2112,10 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, } static void i915_address_space_init(struct i915_address_space *vm, - struct drm_i915_private *dev_priv) + struct drm_i915_private *dev_priv, + const char *name) { + i915_gem_timeline_init(dev_priv, &vm->timeline, name); drm_mm_init(&vm->mm, vm->start, vm->total); INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); @@ -2142,14 +2144,15 @@ static void gtt_write_workarounds(struct drm_device *dev) static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv, + const char *name) { int ret; ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret == 0) { kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv); + i915_address_space_init(&ppgtt->base, dev_priv, name); ppgtt->base.file = file_priv; } @@ -2183,7 +2186,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev) struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *fpriv) + struct drm_i915_file_private *fpriv, + const char *name) { struct i915_hw_ppgtt *ppgtt; int ret; @@ -2192,7 +2196,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); + ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); if (ret) { kfree(ppgtt); return ERR_PTR(ret); @@ -2215,6 +2219,7 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); + i915_gem_timeline_fini(&ppgtt->base.timeline); list_del(&ppgtt->base.global_link); drm_mm_takedown(&ppgtt->base.mm); @@ -3209,11 +3214,13 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) /* Subtract the guard page before address space initialization to * shrink the range used by drm_mm. */ + mutex_lock(&dev_priv->drm.struct_mutex); ggtt->base.total -= PAGE_SIZE; - i915_address_space_init(&ggtt->base, dev_priv); + i915_address_space_init(&ggtt->base, dev_priv, "[global]"); ggtt->base.total += PAGE_SIZE; if (!HAS_LLC(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; + mutex_unlock(&dev_priv->drm.struct_mutex); if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, dev_priv->ggtt.mappable_base, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 205932c90d09..95ab7fca348c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -342,6 +342,7 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; + struct i915_gem_timeline timeline; struct drm_device *dev; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In @@ -614,7 +615,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *fpriv); + struct drm_i915_file_private *fpriv, + const char *name); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 7234540522bd..a629f7d430be 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -34,12 +34,6 @@ static const char *i915_fence_get_driver_name(struct fence *fence) static const char *i915_fence_get_timeline_name(struct fence *fence) { - /* Timelines are bound by eviction to a VM. However, since - * we only have a global seqno at the moment, we only have - * a single timeline. Note that each timeline will have - * multiple execution contexts (fence contexts) as we allow - * engines within a single timeline to execute in parallel. - */ return to_request(fence)->timeline->common->name; } @@ -64,18 +58,6 @@ static signed long i915_fence_wait(struct fence *fence, return i915_wait_request(to_request(fence), interruptible, timeout); } -static void i915_fence_value_str(struct fence *fence, char *str, int size) -{ - snprintf(str, size, "%u", fence->seqno); -} - -static void i915_fence_timeline_value_str(struct fence *fence, char *str, - int size) -{ - snprintf(str, size, "%u", - intel_engine_get_seqno(to_request(fence)->engine)); -} - static void i915_fence_release(struct fence *fence) { struct drm_i915_gem_request *req = to_request(fence); @@ -90,8 +72,6 @@ const struct fence_ops i915_fence_ops = { .signaled = i915_fence_signaled, .wait = i915_fence_wait, .release = i915_fence_release, - .fence_value_str = i915_fence_value_str, - .timeline_value_str = i915_fence_timeline_value_str, }; int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, @@ -144,7 +124,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) struct i915_gem_active *active, *next; trace_i915_gem_request_retire(request); + + spin_lock_irq(&request->engine->timeline->lock); list_del_init(&request->link); + spin_unlock_irq(&request->engine->timeline->lock); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -310,6 +293,12 @@ static int reserve_global_seqno(struct drm_i915_private *i915) return 0; } +static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) +{ + /* next_seqno only incremented under a mutex */ + return tl->next_seqno.counter++; +} + static u32 timeline_get_seqno(struct i915_gem_timeline *tl) { return atomic_inc_return(&tl->next_seqno); @@ -320,17 +309,42 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct drm_i915_gem_request *request = container_of(fence, typeof(*request), submit); + struct intel_timeline *timeline; + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + u32 seqno; /* Will be called from irq-context when using foreign DMA fences */ - switch (state) { - case FENCE_COMPLETE: - request->engine->submit_request(request); - break; + if (state != FENCE_COMPLETE) + return NOTIFY_DONE; - case FENCE_FREE: - break; - } + timeline = engine->timeline; + GEM_BUG_ON(timeline == request->timeline); + + spin_lock_irqsave(&timeline->lock, flags); + + seqno = timeline_get_seqno(timeline->common); + GEM_BUG_ON(seqno == 0); + + request->previous_seqno = timeline->last_submitted_seqno; + timeline->last_submitted_seqno = seqno; + + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = seqno; + if (test_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_enable_signaling(request); + spin_unlock(&request->lock); + + engine->emit_request(request, request->ring->vaddr + request->postfix); + + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); + + engine->submit_request(request); + + spin_unlock_irqrestore(&timeline->lock, flags); return NOTIFY_DONE; } @@ -409,24 +423,24 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, goto err_unreserve; } - req->timeline = engine->timeline; + req->timeline = i915_gem_context_lookup_timeline(ctx, engine); spin_lock_init(&req->lock); fence_init(&req->fence, &i915_fence_ops, &req->lock, req->timeline->fence_context, - timeline_get_seqno(req->timeline->common)); + __timeline_get_seqno(req->timeline->common)); i915_sw_fence_init(&req->submit, submit_notify); INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; - req->global_seqno = req->fence.seqno; req->ctx = i915_gem_context_get(ctx); /* No zalloc, must clear what we need by hand */ + req->global_seqno = 0; req->previous_context = NULL; req->file_priv = NULL; req->batch = NULL; @@ -677,8 +691,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) err = intel_ring_begin(request, engine->emit_request_sz); GEM_BUG_ON(err); request->postfix = ring->tail; - - engine->emit_request(request, request->ring->vaddr + request->postfix); ring->tail += engine->emit_request_sz * sizeof(u32); /* Seal the request and mark it as pending execution. Note that @@ -693,12 +705,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq, GFP_NOWAIT); - request->emitted_jiffies = jiffies; - request->previous_seqno = timeline->last_submitted_seqno; + spin_lock_irq(&timeline->lock); + list_add_tail(&request->link, &timeline->requests); + spin_unlock_irq(&timeline->lock); + timeline->last_submitted_seqno = request->fence.seqno; i915_gem_active_set(&timeline->last_request, request); - list_add_tail(&request->link, &timeline->requests); + list_add_tail(&request->ring_link, &ring->request_list); + request->emitted_jiffies = jiffies; i915_gem_mark_busy(engine); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index a4579c109066..40d9f009673f 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -48,6 +48,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, tl->fence_context = fences++; tl->common = timeline; + spin_lock_init(&tl->lock); init_request_active(&tl->last_request, NULL); INIT_LIST_HEAD(&tl->requests); } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 8000c09b1ea9..f2bf7b1d49a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -35,6 +35,8 @@ struct intel_timeline { u64 fence_context; u32 last_submitted_seqno; + spinlock_t lock; + /** * List of breadcrumbs associated with GPU requests currently * outstanding. diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9ad1028681cf..9dba4971fb1e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -77,22 +77,26 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) static void irq_enable(struct intel_engine_cs *engine) { + unsigned long flags; + /* Enabling the IRQ may miss the generation of the interrupt, but * we still need to force the barrier before reading the seqno, * just in case. */ engine->breadcrumbs.irq_posted = true; - spin_lock_irq(&engine->i915->irq_lock); + spin_lock_irqsave(&engine->i915->irq_lock, flags); engine->irq_enable(engine); - spin_unlock_irq(&engine->i915->irq_lock); + spin_unlock_irqrestore(&engine->i915->irq_lock, flags); } static void irq_disable(struct intel_engine_cs *engine) { - spin_lock_irq(&engine->i915->irq_lock); + unsigned long flags; + + spin_lock_irqsave(&engine->i915->irq_lock, flags); engine->irq_disable(engine); - spin_unlock_irq(&engine->i915->irq_lock); + spin_unlock_irqrestore(&engine->i915->irq_lock, flags); engine->breadcrumbs.irq_posted = false; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 436880149ee0..9d846ecd6993 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -543,9 +543,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); -static inline bool intel_engine_is_active(struct intel_engine_cs *engine) -{ - return i915_gem_active_isset(&engine->timeline->last_request); -} - #endif /* _INTEL_RINGBUFFER_H_ */ -- 2.9.3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx