Re: [PATCH 1/2] drm/i915: Remove i915_request.lock requirement for execution callbacks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 15/07/2020 13:48, Chris Wilson wrote:
Quoting Tvrtko Ursulin (2020-07-15 13:39:56)

On 14/07/2020 10:47, Chris Wilson wrote:
We are using the i915_request.lock to serialise adding an execution
callback with __i915_request_submit. However, if we use an atomic
llist_add to serialise multiple waiters and then check to see if the
request is already executing, we can remove the irq-spinlock.

Fixes: 1d9221e9d395 ("drm/i915: Skip signaling a signaled request")
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
   drivers/gpu/drm/i915/i915_request.c | 38 +++++++----------------------
   1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0b2fe55e6194..c59315def07d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -190,13 +190,11 @@ static void __notify_execute_cb(struct i915_request *rq)
   {
       struct execute_cb *cb, *cn;
- lockdep_assert_held(&rq->lock);
-
-     GEM_BUG_ON(!i915_request_is_active(rq));
       if (llist_empty(&rq->execute_cb))
               return;
- llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
+     llist_for_each_entry_safe(cb, cn,
+                               llist_del_all(&rq->execute_cb), work.llnode)
               irq_work_queue(&cb->work);
/*
@@ -209,7 +207,6 @@ static void __notify_execute_cb(struct i915_request *rq)
        * preempt-to-idle cycle on the target engine, all the while the
        * master execute_cb may refire.
        */
-     init_llist_head(&rq->execute_cb);
   }
static inline void
@@ -276,6 +273,7 @@ static void remove_from_engine(struct i915_request *rq)
       list_del_init(&rq->sched.link);
       clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
       clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+     set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
       spin_unlock_irq(&locked->active.lock);
   }
@@ -323,12 +321,8 @@ bool i915_request_retire(struct i915_request *rq)
               GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
               atomic_dec(&rq->engine->gt->rps.num_waiters);
       }
-     if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
-             set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
-             __notify_execute_cb(rq);
-     }
-     GEM_BUG_ON(!llist_empty(&rq->execute_cb));
       spin_unlock_irq(&rq->lock);
+     __notify_execute_cb(rq);
remove_from_client(rq);
       __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
@@ -357,12 +351,6 @@ void i915_request_retire_upto(struct i915_request *rq)
       } while (i915_request_retire(tmp) && tmp != rq);
   }
-static void __llist_add(struct llist_node *node, struct llist_head *head)
-{
-     node->next = head->first;
-     head->first = node;
-}
-
   static struct i915_request * const *
   __engine_active(struct intel_engine_cs *engine)
   {
@@ -439,18 +427,11 @@ __await_execution(struct i915_request *rq,
               cb->work.func = irq_execute_cb_hook;
       }
- spin_lock_irq(&signal->lock);
-     if (i915_request_is_active(signal) || __request_in_flight(signal)) {
-             if (hook) {
-                     hook(rq, &signal->fence);
-                     i915_request_put(signal);
-             }
-             i915_sw_fence_complete(cb->fence);
-             kmem_cache_free(global.slab_execute_cbs, cb);
-     } else {
-             __llist_add(&cb->work.llnode, &signal->execute_cb);
+     if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
+             if (i915_request_is_active(signal) ||
+                 __request_in_flight(signal))
+                     __notify_execute_cb(signal);

Any reason why the hook couldn't be called straight away but needs to
always go through the worker now?

Maybe it would be easier to figure out if it is race free that way..

if (llist_add(..)) {
         llist_for_each_entry_safe(.., llist_del_all(..), .)

Then you would tell me off for open coding __notify_execute_cb for the
benefit of not going through the irq_work. Something like

@@ -186,7 +186,7 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
         irq_execute_cb(wrk);
  }

-static void __notify_execute_cb(struct i915_request *rq)
+static void __execute_cb_irq(struct i915_request *rq)
  {
         struct execute_cb *cb, *cn;

@@ -209,6 +209,15 @@ static void __notify_execute_cb(struct i915_request *rq)
          */
  }

+static void __execute_cb_imm(struct i915_request *rq)
+{
+       struct execute_cb *cb, *cn;
+
+       llist_for_each_entry_safe(cb, cn,
+                                 llist_del_all(&rq->execute_cb), work.llnode)
+               cb->work.func(&cb->work);
+}
+
  static inline void
  remove_from_client(struct i915_request *request)
  {
@@ -323,7 +332,7 @@ bool i915_request_retire(struct i915_request *rq)
                 atomic_dec(&rq->engine->gt->rps.num_waiters);
         }
         spin_unlock_irq(&rq->lock);
-       __notify_execute_cb(rq);
+       __execute_cb_imm(rq);

         remove_from_client(rq);
         __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
@@ -431,7 +440,7 @@ __await_execution(struct i915_request *rq,
         if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
                 if (i915_request_is_active(signal) ||
                     __request_in_flight(signal))
-                       __notify_execute_cb(signal);
+                       __execute_cb_imm(signal);
         }

         return 0;
@@ -547,7 +556,7 @@ bool __i915_request_submit(struct i915_request *request)
                 list_move_tail(&request->sched.link, &engine->active.requests);
                 clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
         }
-       __notify_execute_cb(request);
+       __execute_cb_irq(request);

         /* We may be recursing from the signal callback of another i915 fence */
         if (!i915_request_signaled(request)) {

I think so. IRQ callback can call the imm version and check for if (..func) needs to go somewhere.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux