Add some rather sophisticated lockless garbage collection for dma_fence_chain objects. For this keep all initialized dma_fence_chain nodes an a queue and trigger garbage collection before a new one is allocated. Signed-off-by: Christian König <christian.koenig@xxxxxxx> --- drivers/dma-buf/dma-fence-chain.c | 160 +++++++++++++++++++++++++----- include/linux/dma-fence-chain.h | 5 + 2 files changed, 142 insertions(+), 23 deletions(-) diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 1b4cb3e5cec9..c2f0b69eabb7 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -9,8 +9,53 @@ #include <linux/dma-fence-chain.h> +static struct dma_fence_chain __rcu *fifo_front; +static struct dma_fence_chain __rcu **fifo_back = &fifo_front; + static bool dma_fence_chain_enable_signaling(struct dma_fence *fence); +/** + * dma_fence_chain_enqueue - enqeue a chain node for garbage collection + * @chain: the chain node to enqueue + * + * Add the chain node to the end of the gc fifo. + */ +static void dma_fence_chain_enqueue(struct dma_fence_chain *chain) +{ + struct dma_fence_chain __rcu **tmp; + + RCU_INIT_POINTER(chain->next, NULL); + tmp = xchg((struct dma_fence_chain __force ***)&fifo_back, + &chain->next); + + /* This is intentionally unordered since we only need the fifo for gc */ + rcu_assign_pointer(*tmp, chain); +} + +/** + * dma_fence_chain_dequeue - deqeueue a chain node for garbage collection + * + * Remove the first chain node from the gc fifo while making sure that always + * keep at least one node on the fifo for lockless fifo implementation. + * Returns the dequeued chain node or NULL. + */ +static struct dma_fence_chain *dma_fence_chain_dequeue(void) +{ + struct dma_fence_chain *chain, *tmp; + + rcu_read_lock(); + chain = rcu_dereference(fifo_front); + /* Never dequeue the last chain node for lockless fifo */ + if (unlikely(!chain || !rcu_access_pointer(chain->next))) { + rcu_read_unlock(); + return NULL; + } + tmp = cmpxchg((struct dma_fence_chain __force **)&fifo_front, + chain, rcu_access_pointer(chain->next)); + rcu_read_unlock(); + return tmp == chain ? chain : NULL; +} + /** * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence * @chain: chain node to get the previous node from @@ -28,6 +73,43 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain) return prev; } +/** + * dma_fence_chain_try_replace - try to replace the prev node + * @chain: Chain node we try to replace prev. + * @prev: the old prev node + * + * Try to replace the previous chain node when it or its containing fence is + * signaled. Returns true if we tried, false if we need to wait. + */ +static bool dma_fence_chain_try_replace(struct dma_fence_chain *chain, + struct dma_fence *prev) +{ + struct dma_fence *replacement, *tmp; + struct dma_fence_chain *prev_chain; + + prev_chain = to_dma_fence_chain(prev); + if (prev_chain) { + if (!dma_fence_is_signaled(prev_chain->fence)) + return false; + + replacement = dma_fence_chain_get_prev(prev_chain); + } else { + if (!dma_fence_is_signaled(prev)) + return false; + + replacement = NULL; + } + + tmp = cmpxchg((struct dma_fence __force **)&chain->prev, prev, + replacement); + if (tmp == prev) + dma_fence_put(tmp); + else + dma_fence_put(replacement); + + return true; +} + /** * dma_fence_chain_walk - chain walking function * @fence: current chain node @@ -38,8 +120,8 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain) */ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence) { - struct dma_fence_chain *chain, *prev_chain; - struct dma_fence *prev, *replacement, *tmp; + struct dma_fence_chain *chain; + struct dma_fence *prev; chain = to_dma_fence_chain(fence); if (!chain) { @@ -48,26 +130,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence) } while ((prev = dma_fence_chain_get_prev(chain))) { - - prev_chain = to_dma_fence_chain(prev); - if (prev_chain) { - if (!dma_fence_is_signaled(prev_chain->fence)) - break; - - replacement = dma_fence_chain_get_prev(prev_chain); - } else { - if (!dma_fence_is_signaled(prev)) - break; - - replacement = NULL; - } - - tmp = cmpxchg((struct dma_fence __force **)&chain->prev, - prev, replacement); - if (tmp == prev) - dma_fence_put(tmp); - else - dma_fence_put(replacement); + if (!dma_fence_chain_try_replace(chain, prev)) + break; dma_fence_put(prev); } @@ -205,7 +269,21 @@ static void dma_fence_chain_release(struct dma_fence *fence) dma_fence_put(prev); dma_fence_put(chain->fence); - dma_fence_free(fence); + WRITE_ONCE(chain->fence, NULL); + + /* + * Don't garbage collect here to avoid recursion and potential stack + * overflow. + */ + chain = dma_fence_chain_dequeue(); + if (!chain) + return; + + if (kref_read(&chain->base.refcount) || + READ_ONCE(chain->fence)) + dma_fence_chain_enqueue(chain); + else + dma_fence_free(&chain->base); } const struct dma_fence_ops dma_fence_chain_ops = { @@ -218,6 +296,40 @@ const struct dma_fence_ops dma_fence_chain_ops = { }; EXPORT_SYMBOL(dma_fence_chain_ops); +/** + * dma_fence_chain_garbage_collect - cleanup chain nodes + * + * Do some garbage collection and try to release chain nodes. + */ +void dma_fence_chain_garbage_collect(void) +{ + struct dma_fence_chain *chain = dma_fence_chain_dequeue(); + + if (!chain) + return; + + if (!dma_fence_get_rcu(&chain->base)) { + /* Unused, check if it's also clean */ + if (likely(!READ_ONCE(chain->fence))) { + dma_fence_free(&chain->base); + return; + } + + } else { + struct dma_fence *prev; + + /* Used, do some chain walk */ + prev = dma_fence_chain_get_prev(chain); + if (prev) { + dma_fence_chain_try_replace(chain, prev); + dma_fence_put(prev); + } + dma_fence_put(&chain->base); + } + dma_fence_chain_enqueue(chain); +} +EXPORT_SYMBOL(dma_fence_chain_garbage_collect); + /** * dma_fence_chain_init - initialize a fence chain * @chain: the chain node to initialize @@ -254,5 +366,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, dma_fence_init(&chain->base, &dma_fence_chain_ops, &chain->lock, context, seqno); + dma_fence_chain_enqueue(chain); } + EXPORT_SYMBOL(dma_fence_chain_init); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 5f45689a6d2e..b412b5396434 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -19,6 +19,7 @@ * @base: fence base class * @lock: spinlock for fence handling * @prev: previous fence of the chain + * @next: next chain node for garbage collection * @prev_seqno: original previous seqno before garbage collection * @fence: encapsulated fence * @cb: callback structure for signaling @@ -27,6 +28,7 @@ struct dma_fence_chain { struct dma_fence base; struct dma_fence __rcu *prev; + struct dma_fence_chain __rcu *next; u64 prev_seqno; struct dma_fence *fence; union { @@ -38,6 +40,8 @@ struct dma_fence_chain { extern const struct dma_fence_ops dma_fence_chain_ops; +void dma_fence_chain_garbage_collect(void); + /** * to_dma_fence_chain - cast a fence to a dma_fence_chain * @fence: fence to cast to a dma_fence_array @@ -61,6 +65,7 @@ to_dma_fence_chain(struct dma_fence *fence) */ static inline struct dma_fence_chain *dma_fence_chain_alloc(void) { + dma_fence_chain_garbage_collect(); return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); }; -- 2.25.1