On Thu, Feb 16, 2023 at 3:00 AM Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> wrote: > > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > Track how many callers are explicity waiting on a fence to signal and > allow querying that via new dma_fence_wait_count() API. > > This provides infrastructure on top of which generic "waitboost" concepts > can be implemented by individual drivers. Wait-boosting is any reactive > activity, such as raising the GPU clocks, which happens while there are > active external waiters. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > drivers/dma-buf/dma-fence.c | 98 +++++++++++++++++------ > drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 - > include/linux/dma-fence.h | 15 ++++ > 3 files changed, 87 insertions(+), 27 deletions(-) > > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c > index ea4a1f82c9bf..bdba5a8e21b1 100644 > --- a/drivers/dma-buf/dma-fence.c > +++ b/drivers/dma-buf/dma-fence.c > @@ -344,6 +344,25 @@ void __dma_fence_might_wait(void) > } > #endif > > +static void incr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb) > +{ > + lockdep_assert_held(fence->lock); > + > + __set_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, &cb->flags); > + fence->waitcount++; > + WARN_ON_ONCE(!fence->waitcount); > +} > + > +static void decr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb) > +{ > + lockdep_assert_held(fence->lock); > + > + if (__test_and_clear_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, &cb->flags)) { > + WARN_ON_ONCE(!fence->waitcount); > + fence->waitcount--; > + } > +} > + > void __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) > { > lockdep_assert_held(fence->lock); > @@ -363,6 +382,7 @@ __dma_fence_signal__notify(struct dma_fence *fence, > lockdep_assert_held(fence->lock); > > list_for_each_entry_safe(cur, tmp, list, node) { > + decr_wait_count(fence, cur); > INIT_LIST_HEAD(&cur->node); > cur->func(fence, cur); > } > @@ -629,11 +649,44 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence) > unsigned long flags; > > spin_lock_irqsave(fence->lock, flags); > + fence->waitcount++; > + WARN_ON_ONCE(!fence->waitcount); > __dma_fence_enable_signaling(fence); > spin_unlock_irqrestore(fence->lock, flags); > } > EXPORT_SYMBOL(dma_fence_enable_sw_signaling); > > +static int add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, > + dma_fence_func_t func, bool wait) > +{ > + unsigned long flags; > + int ret = 0; > + > + __dma_fence_cb_init(cb, func); > + > + if (WARN_ON(!fence || !func)) > + return -EINVAL; > + > + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) > + return -ENOENT; > + > + spin_lock_irqsave(fence->lock, flags); > + > + if (wait) > + incr_wait_count(fence, cb); > + > + if (__dma_fence_enable_signaling(fence)) { > + list_add_tail(&cb->node, &fence->cb_list); > + } else { > + decr_wait_count(fence, cb); > + ret = -ENOENT; > + } > + > + spin_unlock_irqrestore(fence->lock, flags); > + > + return ret; > +} > + > /** > * dma_fence_add_callback - add a callback to be called when the fence > * is signaled > @@ -659,31 +712,18 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling); > int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, > dma_fence_func_t func) > { > - unsigned long flags; > - int ret = 0; > - > - __dma_fence_cb_init(cb, func); > - > - if (WARN_ON(!fence || !func)) > - return -EINVAL; > - > - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) > - return -ENOENT; > - > - spin_lock_irqsave(fence->lock, flags); > - > - if (__dma_fence_enable_signaling(fence)) { > - list_add_tail(&cb->node, &fence->cb_list); > - } else { > - ret = -ENOENT; > - } > - > - spin_unlock_irqrestore(fence->lock, flags); > - > - return ret; > + return add_callback(fence, cb, func, false); > } > EXPORT_SYMBOL(dma_fence_add_callback); > > +int dma_fence_add_wait_callback(struct dma_fence *fence, > + struct dma_fence_cb *cb, > + dma_fence_func_t func) > +{ > + return add_callback(fence, cb, func, true); > +} > +EXPORT_SYMBOL(dma_fence_add_wait_callback); > + > /** > * dma_fence_get_status - returns the status upon completion > * @fence: the dma_fence to query > @@ -736,8 +776,10 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) > spin_lock_irqsave(fence->lock, flags); > > ret = !list_empty(&cb->node); > - if (ret) > + if (ret) { > + decr_wait_count(fence, cb); > list_del_init(&cb->node); > + } > > spin_unlock_irqrestore(fence->lock, flags); > > @@ -795,6 +837,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) > > __dma_fence_cb_init(&cb.base, dma_fence_default_wait_cb); > cb.task = current; > + incr_wait_count(fence, &cb.base); > list_add(&cb.base.node, &fence->cb_list); > > while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { > @@ -811,8 +854,10 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) > ret = -ERESTARTSYS; > } > > - if (!list_empty(&cb.base.node)) > + if (!list_empty(&cb.base.node)) { > + decr_wait_count(fence, &cb.base); > list_del(&cb.base.node); > + } > __set_current_state(TASK_RUNNING); > > out: > @@ -890,8 +935,8 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, > struct dma_fence *fence = fences[i]; > > cb[i].task = current; > - if (dma_fence_add_callback(fence, &cb[i].base, > - dma_fence_default_wait_cb)) { > + if (dma_fence_add_wait_callback(fence, &cb[i].base, > + dma_fence_default_wait_cb)) { > /* This fence is already signaled */ > if (idx) > *idx = i; > @@ -972,6 +1017,7 @@ dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, > fence->context = context; > fence->seqno = seqno; > fence->flags = 0UL; > + fence->waitcount = 0; > fence->error = 0; > > trace_dma_fence_init(fence); > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c > index e971b153fda9..2693a0151a6b 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c > @@ -218,7 +218,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) > * until the background request retirement running every > * second or two). > */ > - BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); > dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); > rq->duration.emitted = ktime_get(); > } > diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h > index 35933e0ae62c..2b696f9de276 100644 > --- a/include/linux/dma-fence.h > +++ b/include/linux/dma-fence.h > @@ -92,6 +92,7 @@ struct dma_fence { > u64 seqno; > unsigned long flags; > struct kref refcount; > + unsigned int waitcount; > int error; > }; > > @@ -116,6 +117,11 @@ typedef void (*dma_fence_func_t)(struct dma_fence *fence, > struct dma_fence_cb { > struct list_head node; > dma_fence_func_t func; > + unsigned long flags; > +}; > + > +enum dma_fence_cb_flag_bits { > + DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, > }; > > /** > @@ -381,6 +387,9 @@ signed long dma_fence_default_wait(struct dma_fence *fence, > int dma_fence_add_callback(struct dma_fence *fence, > struct dma_fence_cb *cb, > dma_fence_func_t func); > +int dma_fence_add_wait_callback(struct dma_fence *fence, > + struct dma_fence_cb *cb, > + dma_fence_func_t func); > bool dma_fence_remove_callback(struct dma_fence *fence, > struct dma_fence_cb *cb); > void dma_fence_enable_sw_signaling(struct dma_fence *fence); > @@ -532,6 +541,11 @@ static inline int dma_fence_get_status_locked(struct dma_fence *fence) > > int dma_fence_get_status(struct dma_fence *fence); > > +static inline unsigned int dma_fence_wait_count(struct dma_fence *fence) > +{ > + return fence->waitcount; > +} One thing I noticed while reviving my fence-deadline series is that this approach is not propagating through array and chain fences BR, -R > + > /** > * dma_fence_set_error - flag an error condition on the fence > * @fence: the dma_fence > @@ -634,6 +648,7 @@ static inline void __dma_fence_cb_init(struct dma_fence_cb *cb, > { > INIT_LIST_HEAD(&cb->node); > cb->func = func; > + cb->flags = 0; > } > > #endif /* __LINUX_DMA_FENCE_H */ > -- > 2.34.1 >