On 03/06/16 17:08, Chris Wilson wrote:
Under the assumption that enabling signaling will be a frequent operation, lets preallocate our attachments for signaling inside the request struct (and so benefiting from the slab cache).
Oh you did this part which I suggested in the previous patch. :)
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 89 ++++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++ 3 files changed, 56 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b0235372cfdf..88d9242398ce 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2363,6 +2363,7 @@ struct drm_i915_gem_request { struct drm_i915_private *i915; struct intel_engine_cs *engine; unsigned reset_counter; + struct intel_signal_node signaling; /** GEM sequence number associated with the previous request, * when the HWS breadcrumb is equal to this the GPU is processing diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 143891a2b68a..8ab508ed4248 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -128,16 +128,14 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, wake_up_process(wait->task); /* implicit smp_wmb() */ } -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +static bool __intel_engine_add_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node **p, *parent, *completed; bool first; u32 seqno; - spin_lock(&b->lock); - /* Insert the request into the retirement ordered list * of waiters by walking the rbtree. If we are the oldest * seqno in the tree (the first to be retired), then @@ -223,6 +221,17 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!b->first_wait); GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); + return first; +} + +bool intel_engine_add_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool first; + + spin_lock(&b->lock); + first = __intel_engine_add_wait(engine, wait); spin_unlock(&b->lock); return first; @@ -323,35 +332,29 @@ out_unlock: spin_unlock(&b->lock); } -struct signal { - struct rb_node node; - struct intel_wait wait; - struct drm_i915_gem_request *request; -}; - -static bool signal_complete(struct signal *signal) +static bool signal_complete(struct drm_i915_gem_request *request) { - if (signal == NULL) + if (request == NULL) return false; /* If another process served as the bottom-half it may have already * signalled that this wait is already completed. */ - if (intel_wait_complete(&signal->wait)) + if (intel_wait_complete(&request->signaling.wait)) return true; /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. */ - if (__i915_request_irq_complete(signal->request)) + if (__i915_request_irq_complete(request)) return true; return false; } -static struct signal *to_signal(struct rb_node *rb) +static struct drm_i915_gem_request *to_signal(struct rb_node *rb)
Why it is call to_signal then?
{ - return container_of(rb, struct signal, node); + return container_of(rb, struct drm_i915_gem_request, signaling.node); } static void signaler_set_rtpriority(void) @@ -364,7 +367,7 @@ static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct signal *signal; + struct drm_i915_gem_request *request; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); @@ -380,14 +383,13 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - signal = READ_ONCE(b->first_signal); - if (signal_complete(signal)) { + request = READ_ONCE(b->first_signal); + if (signal_complete(request)) { /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. */ - intel_engine_remove_wait(engine, &signal->wait); - - i915_gem_request_unreference(signal->request); + intel_engine_remove_wait(engine, + &request->signaling.wait); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -396,12 +398,15 @@ static int intel_breadcrumbs_signaler(void *arg) * the oldest before picking the next one. */ spin_lock(&b->lock); - if (signal == b->first_signal) - b->first_signal = rb_next(&signal->node); - rb_erase(&signal->node, &b->signals); + if (request == b->first_signal) { + struct rb_node *rb = + rb_next(&request->signaling.node); + b->first_signal = rb ? to_signal(rb) : NULL;
Made me look in the previous patch on how you didn't need to change the type for first_signal in this one. void* ! :) Please fix there. :)
+ } + rb_erase(&request->signaling.node, &b->signals); spin_unlock(&b->lock); - kfree(signal); + i915_gem_request_unreference(request); } else { if (kthread_should_stop()) break; @@ -418,20 +423,23 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; - struct signal *signal; bool first, wakeup; if (unlikely(IS_ERR(b->signaler))) return PTR_ERR(b->signaler); - signal = kmalloc(sizeof(*signal), GFP_ATOMIC); - if (unlikely(!signal)) - return -ENOMEM; + if (unlikely(READ_ONCE(request->signaling.wait.task))) + return 0;
Hmm it will depend on following patches whether this is safe. I don't like the explosion of READ_ONCE and smp_store_mb's in these patches. Something is bound to be broken.
You even check it below under the lock. So I am not sure this optimisation is worth it. Maybe leave it for later?
- signal->wait.task = b->signaler; - signal->wait.seqno = request->seqno; + spin_lock(&b->lock); + if (unlikely(request->signaling.wait.task)) { + wakeup = false; + goto unlock; + } - signal->request = i915_gem_request_reference(request); + request->signaling.wait.task = b->signaler; + request->signaling.wait.seqno = request->seqno; + i915_gem_request_reference(request); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -441,29 +449,30 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request) * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ - wakeup = intel_engine_add_wait(engine, &signal->wait); + wakeup = __intel_engine_add_wait(engine, &request->signaling.wait); /* Now insert ourselves into the retirement ordered list of signals * on this engine. We track the oldest seqno as that will be the * first signal to complete. */ - spin_lock(&b->lock); parent = NULL; first = true; p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(signal->wait.seqno, - to_signal(parent)->wait.seqno)) { + if (i915_seqno_passed(request->seqno, + to_signal(parent)->seqno)) { p = &parent->rb_right; first = false; } else p = &parent->rb_left; } - rb_link_node(&signal->node, parent, p); - rb_insert_color(&signal->node, &b->signals); + rb_link_node(&request->signaling.node, parent, p); + rb_insert_color(&request->signaling.node, &b->signals); if (first) - smp_store_mb(b->first_signal, signal); + smp_store_mb(b->first_signal, request); + +unlock: spin_unlock(&b->lock); if (wakeup) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f4bca38caef0..5f7cb3d0ea1c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -530,6 +530,12 @@ struct intel_wait { struct task_struct *task; u32 seqno; }; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) {
Otherwise looks OK. Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx