Re: [RFC 5/9] dma-fence: Track explicit waiters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 18/02/2023 19:54, Rob Clark wrote:
On Thu, Feb 16, 2023 at 3:00 AM Tvrtko Ursulin
<tvrtko.ursulin@xxxxxxxxxxxxxxx> wrote:

From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Track how many callers are explicity waiting on a fence to signal and
allow querying that via new dma_fence_wait_count() API.

This provides infrastructure on top of which generic "waitboost" concepts
can be implemented by individual drivers. Wait-boosting is any reactive
activity, such as raising the GPU clocks, which happens while there are
active external waiters.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
  drivers/dma-buf/dma-fence.c               | 98 +++++++++++++++++------
  drivers/gpu/drm/i915/gt/intel_engine_pm.c |  1 -
  include/linux/dma-fence.h                 | 15 ++++
  3 files changed, 87 insertions(+), 27 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index ea4a1f82c9bf..bdba5a8e21b1 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -344,6 +344,25 @@ void __dma_fence_might_wait(void)
  }
  #endif

+static void incr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       lockdep_assert_held(fence->lock);
+
+       __set_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, &cb->flags);
+       fence->waitcount++;
+       WARN_ON_ONCE(!fence->waitcount);
+}
+
+static void decr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       lockdep_assert_held(fence->lock);
+
+       if (__test_and_clear_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, &cb->flags)) {
+               WARN_ON_ONCE(!fence->waitcount);
+               fence->waitcount--;
+       }
+}
+
  void __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
  {
         lockdep_assert_held(fence->lock);
@@ -363,6 +382,7 @@ __dma_fence_signal__notify(struct dma_fence *fence,
         lockdep_assert_held(fence->lock);

         list_for_each_entry_safe(cur, tmp, list, node) {
+               decr_wait_count(fence, cur);
                 INIT_LIST_HEAD(&cur->node);
                 cur->func(fence, cur);
         }
@@ -629,11 +649,44 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
         unsigned long flags;

         spin_lock_irqsave(fence->lock, flags);
+       fence->waitcount++;
+       WARN_ON_ONCE(!fence->waitcount);
         __dma_fence_enable_signaling(fence);
         spin_unlock_irqrestore(fence->lock, flags);
  }
  EXPORT_SYMBOL(dma_fence_enable_sw_signaling);

+static int add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
+                       dma_fence_func_t func, bool wait)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       __dma_fence_cb_init(cb, func);
+
+       if (WARN_ON(!fence || !func))
+               return -EINVAL;
+
+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+               return -ENOENT;
+
+       spin_lock_irqsave(fence->lock, flags);
+
+       if (wait)
+               incr_wait_count(fence, cb);
+
+       if (__dma_fence_enable_signaling(fence)) {
+               list_add_tail(&cb->node, &fence->cb_list);
+       } else {
+               decr_wait_count(fence, cb);
+               ret = -ENOENT;
+       }
+
+       spin_unlock_irqrestore(fence->lock, flags);
+
+       return ret;
+}
+
  /**
   * dma_fence_add_callback - add a callback to be called when the fence
   * is signaled
@@ -659,31 +712,18 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
  int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
                            dma_fence_func_t func)
  {
-       unsigned long flags;
-       int ret = 0;
-
-       __dma_fence_cb_init(cb, func);
-
-       if (WARN_ON(!fence || !func))
-               return -EINVAL;
-
-       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-               return -ENOENT;
-
-       spin_lock_irqsave(fence->lock, flags);
-
-       if (__dma_fence_enable_signaling(fence)) {
-               list_add_tail(&cb->node, &fence->cb_list);
-       } else {
-               ret = -ENOENT;
-       }
-
-       spin_unlock_irqrestore(fence->lock, flags);
-
-       return ret;
+       return add_callback(fence, cb, func, false);
  }
  EXPORT_SYMBOL(dma_fence_add_callback);

+int dma_fence_add_wait_callback(struct dma_fence *fence,
+                               struct dma_fence_cb *cb,
+                               dma_fence_func_t func)
+{
+       return add_callback(fence, cb, func, true);
+}
+EXPORT_SYMBOL(dma_fence_add_wait_callback);
+
  /**
   * dma_fence_get_status - returns the status upon completion
   * @fence: the dma_fence to query
@@ -736,8 +776,10 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
         spin_lock_irqsave(fence->lock, flags);

         ret = !list_empty(&cb->node);
-       if (ret)
+       if (ret) {
+               decr_wait_count(fence, cb);
                 list_del_init(&cb->node);
+       }

         spin_unlock_irqrestore(fence->lock, flags);

@@ -795,6 +837,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)

         __dma_fence_cb_init(&cb.base, dma_fence_default_wait_cb);
         cb.task = current;
+       incr_wait_count(fence, &cb.base);
         list_add(&cb.base.node, &fence->cb_list);

         while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
@@ -811,8 +854,10 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
                         ret = -ERESTARTSYS;
         }

-       if (!list_empty(&cb.base.node))
+       if (!list_empty(&cb.base.node)) {
+               decr_wait_count(fence, &cb.base);
                 list_del(&cb.base.node);
+       }
         __set_current_state(TASK_RUNNING);

  out:
@@ -890,8 +935,8 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
                 struct dma_fence *fence = fences[i];

                 cb[i].task = current;
-               if (dma_fence_add_callback(fence, &cb[i].base,
-                                          dma_fence_default_wait_cb)) {
+               if (dma_fence_add_wait_callback(fence, &cb[i].base,
+                                               dma_fence_default_wait_cb)) {
                         /* This fence is already signaled */
                         if (idx)
                                 *idx = i;
@@ -972,6 +1017,7 @@ dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
         fence->context = context;
         fence->seqno = seqno;
         fence->flags = 0UL;
+       fence->waitcount = 0;
         fence->error = 0;

         trace_dma_fence_init(fence);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e971b153fda9..2693a0151a6b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -218,7 +218,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
                  * until the background request retirement running every
                  * second or two).
                  */
-               BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
                 dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
                 rq->duration.emitted = ktime_get();
         }
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 35933e0ae62c..2b696f9de276 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -92,6 +92,7 @@ struct dma_fence {
         u64 seqno;
         unsigned long flags;
         struct kref refcount;
+       unsigned int waitcount;
         int error;
  };

@@ -116,6 +117,11 @@ typedef void (*dma_fence_func_t)(struct dma_fence *fence,
  struct dma_fence_cb {
         struct list_head node;
         dma_fence_func_t func;
+       unsigned long flags;
+};
+
+enum dma_fence_cb_flag_bits {
+       DMA_FENCE_CB_FLAG_WAITCOUNT_BIT,
  };

  /**
@@ -381,6 +387,9 @@ signed long dma_fence_default_wait(struct dma_fence *fence,
  int dma_fence_add_callback(struct dma_fence *fence,
                            struct dma_fence_cb *cb,
                            dma_fence_func_t func);
+int dma_fence_add_wait_callback(struct dma_fence *fence,
+                               struct dma_fence_cb *cb,
+                               dma_fence_func_t func);
  bool dma_fence_remove_callback(struct dma_fence *fence,
                                struct dma_fence_cb *cb);
  void dma_fence_enable_sw_signaling(struct dma_fence *fence);
@@ -532,6 +541,11 @@ static inline int dma_fence_get_status_locked(struct dma_fence *fence)

  int dma_fence_get_status(struct dma_fence *fence);

+static inline unsigned int dma_fence_wait_count(struct dma_fence *fence)
+{
+       return fence->waitcount;
+}

One thing I noticed while reviving my fence-deadline series is that
this approach is not propagating through array and chain fences

True, thanks.

So I'd need a way to "inherit" the waitcount (and so complete the semi-OO) in both enable_signaling callbacks. Not sure if it would be a problem for the dma_fence_chain_enable_signaling since it currently appears to be only enabling signalling on the last of the chain. Maybe walking all and enabling sw signalling on all but last (which has the callback) might work.

But anyway, I'll hold off re-spinning for this until the fate of the series is clear.

Regards,

Tvrtko


BR,
-R

+
  /**
   * dma_fence_set_error - flag an error condition on the fence
   * @fence: the dma_fence
@@ -634,6 +648,7 @@ static inline void __dma_fence_cb_init(struct dma_fence_cb *cb,
  {
         INIT_LIST_HEAD(&cb->node);
         cb->func = func;
+       cb->flags = 0;
  }

  #endif /* __LINUX_DMA_FENCE_H */
--
2.34.1




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux