From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Track how many callers are explicity waiting on a fence to signal and allow querying that via new dma_fence_wait_count() API. This provides infrastructure on top of which generic "waitboost" concepts can be implemented by individual drivers. Wait-boosting is any reactive activity, such as raising the GPU clocks, which happens while there are active external waiters. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/dma-buf/dma-fence.c | 98 +++++++++++++++++------ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 - include/linux/dma-fence.h | 15 ++++ 3 files changed, 87 insertions(+), 27 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index ea4a1f82c9bf..bdba5a8e21b1 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -344,6 +344,25 @@ void __dma_fence_might_wait(void) } #endif +static void incr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + lockdep_assert_held(fence->lock); + + __set_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, &cb->flags); + fence->waitcount++; + WARN_ON_ONCE(!fence->waitcount); +} + +static void decr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + lockdep_assert_held(fence->lock); + + if (__test_and_clear_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, &cb->flags)) { + WARN_ON_ONCE(!fence->waitcount); + fence->waitcount--; + } +} + void __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) { lockdep_assert_held(fence->lock); @@ -363,6 +382,7 @@ __dma_fence_signal__notify(struct dma_fence *fence, lockdep_assert_held(fence->lock); list_for_each_entry_safe(cur, tmp, list, node) { + decr_wait_count(fence, cur); INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } @@ -629,11 +649,44 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence) unsigned long flags; spin_lock_irqsave(fence->lock, flags); + fence->waitcount++; + WARN_ON_ONCE(!fence->waitcount); __dma_fence_enable_signaling(fence); spin_unlock_irqrestore(fence->lock, flags); } EXPORT_SYMBOL(dma_fence_enable_sw_signaling); +static int add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, + dma_fence_func_t func, bool wait) +{ + unsigned long flags; + int ret = 0; + + __dma_fence_cb_init(cb, func); + + if (WARN_ON(!fence || !func)) + return -EINVAL; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return -ENOENT; + + spin_lock_irqsave(fence->lock, flags); + + if (wait) + incr_wait_count(fence, cb); + + if (__dma_fence_enable_signaling(fence)) { + list_add_tail(&cb->node, &fence->cb_list); + } else { + decr_wait_count(fence, cb); + ret = -ENOENT; + } + + spin_unlock_irqrestore(fence->lock, flags); + + return ret; +} + /** * dma_fence_add_callback - add a callback to be called when the fence * is signaled @@ -659,31 +712,18 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling); int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func) { - unsigned long flags; - int ret = 0; - - __dma_fence_cb_init(cb, func); - - if (WARN_ON(!fence || !func)) - return -EINVAL; - - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return -ENOENT; - - spin_lock_irqsave(fence->lock, flags); - - if (__dma_fence_enable_signaling(fence)) { - list_add_tail(&cb->node, &fence->cb_list); - } else { - ret = -ENOENT; - } - - spin_unlock_irqrestore(fence->lock, flags); - - return ret; + return add_callback(fence, cb, func, false); } EXPORT_SYMBOL(dma_fence_add_callback); +int dma_fence_add_wait_callback(struct dma_fence *fence, + struct dma_fence_cb *cb, + dma_fence_func_t func) +{ + return add_callback(fence, cb, func, true); +} +EXPORT_SYMBOL(dma_fence_add_wait_callback); + /** * dma_fence_get_status - returns the status upon completion * @fence: the dma_fence to query @@ -736,8 +776,10 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) spin_lock_irqsave(fence->lock, flags); ret = !list_empty(&cb->node); - if (ret) + if (ret) { + decr_wait_count(fence, cb); list_del_init(&cb->node); + } spin_unlock_irqrestore(fence->lock, flags); @@ -795,6 +837,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) __dma_fence_cb_init(&cb.base, dma_fence_default_wait_cb); cb.task = current; + incr_wait_count(fence, &cb.base); list_add(&cb.base.node, &fence->cb_list); while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { @@ -811,8 +854,10 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) ret = -ERESTARTSYS; } - if (!list_empty(&cb.base.node)) + if (!list_empty(&cb.base.node)) { + decr_wait_count(fence, &cb.base); list_del(&cb.base.node); + } __set_current_state(TASK_RUNNING); out: @@ -890,8 +935,8 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, struct dma_fence *fence = fences[i]; cb[i].task = current; - if (dma_fence_add_callback(fence, &cb[i].base, - dma_fence_default_wait_cb)) { + if (dma_fence_add_wait_callback(fence, &cb[i].base, + dma_fence_default_wait_cb)) { /* This fence is already signaled */ if (idx) *idx = i; @@ -972,6 +1017,7 @@ dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, fence->context = context; fence->seqno = seqno; fence->flags = 0UL; + fence->waitcount = 0; fence->error = 0; trace_dma_fence_init(fence); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index e971b153fda9..2693a0151a6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -218,7 +218,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * until the background request retirement running every * second or two). */ - BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); rq->duration.emitted = ktime_get(); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 35933e0ae62c..2b696f9de276 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -92,6 +92,7 @@ struct dma_fence { u64 seqno; unsigned long flags; struct kref refcount; + unsigned int waitcount; int error; }; @@ -116,6 +117,11 @@ typedef void (*dma_fence_func_t)(struct dma_fence *fence, struct dma_fence_cb { struct list_head node; dma_fence_func_t func; + unsigned long flags; +}; + +enum dma_fence_cb_flag_bits { + DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, }; /** @@ -381,6 +387,9 @@ signed long dma_fence_default_wait(struct dma_fence *fence, int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func); +int dma_fence_add_wait_callback(struct dma_fence *fence, + struct dma_fence_cb *cb, + dma_fence_func_t func); bool dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb); void dma_fence_enable_sw_signaling(struct dma_fence *fence); @@ -532,6 +541,11 @@ static inline int dma_fence_get_status_locked(struct dma_fence *fence) int dma_fence_get_status(struct dma_fence *fence); +static inline unsigned int dma_fence_wait_count(struct dma_fence *fence) +{ + return fence->waitcount; +} + /** * dma_fence_set_error - flag an error condition on the fence * @fence: the dma_fence @@ -634,6 +648,7 @@ static inline void __dma_fence_cb_init(struct dma_fence_cb *cb, { INIT_LIST_HEAD(&cb->node); cb->func = func; + cb->flags = 0; } #endif /* __LINUX_DMA_FENCE_H */ -- 2.34.1