在 2018/12/3 21:28, Christian König 写道: > Am 03.12.18 um 14:18 schrieb Chunming Zhou: >> >> 在 2018/12/3 19:00, Christian König 写道: >>> Am 03.12.18 um 06:25 schrieb zhoucm1: >>>> >>>> On 2018年11月28日 22:50, Christian König wrote: >>>>> Lockless container implementation similar to a dma_fence_array, but >>>>> with >>>>> only two elements per node and automatic garbage collection. >>>>> >>>>> v2: properly document dma_fence_chain_for_each, add >>>>> dma_fence_chain_find_seqno, >>>>> drop prev reference during garbage collection if it's not a >>>>> chain fence. >>>>> >>>>> Signed-off-by: Christian König <christian.koenig@xxxxxxx> >>>>> --- >>>>> drivers/dma-buf/Makefile | 3 +- >>>>> drivers/dma-buf/dma-fence-chain.c | 235 >>>>> ++++++++++++++++++++++++++++++++++++++ >>>>> include/linux/dma-fence-chain.h | 79 +++++++++++++ >>>>> 3 files changed, 316 insertions(+), 1 deletion(-) >>>>> create mode 100644 drivers/dma-buf/dma-fence-chain.c >>>>> create mode 100644 include/linux/dma-fence-chain.h >>>>> >>>>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile >>>>> index 0913a6ccab5a..1f006e083eb9 100644 >>>>> --- a/drivers/dma-buf/Makefile >>>>> +++ b/drivers/dma-buf/Makefile >>>>> @@ -1,4 +1,5 @@ >>>>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o >>>>> seqno-fence.o >>>>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ >>>>> + reservation.o seqno-fence.o >>>>> obj-$(CONFIG_SYNC_FILE) += sync_file.o >>>>> obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o >>>>> obj-$(CONFIG_UDMABUF) += udmabuf.o >>>>> diff --git a/drivers/dma-buf/dma-fence-chain.c >>>>> b/drivers/dma-buf/dma-fence-chain.c >>>>> new file mode 100644 >>>>> index 000000000000..de05101fc48d >>>>> --- /dev/null >>>>> +++ b/drivers/dma-buf/dma-fence-chain.c >>>>> @@ -0,0 +1,235 @@ >>>>> +/* >>>>> + * fence-chain: chain fences together in a timeline >>>>> + * >>>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc. >>>>> + * Authors: >>>>> + * Christian König <christian.koenig@xxxxxxx> >>>>> + * >>>>> + * This program is free software; you can redistribute it and/or >>>>> modify it >>>>> + * under the terms of the GNU General Public License version 2 as >>>>> published by >>>>> + * the Free Software Foundation. >>>>> + * >>>>> + * This program is distributed in the hope that it will be useful, >>>>> but WITHOUT >>>>> + * ANY WARRANTY; without even the implied warranty of >>>>> MERCHANTABILITY or >>>>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public >>>>> License for >>>>> + * more details. >>>>> + */ >>>>> + >>>>> +#include <linux/dma-fence-chain.h> >>>>> + >>>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence >>>>> *fence); >>>>> + >>>>> +/** >>>>> + * dma_fence_chain_get_prev - use RCU to get a reference to the >>>>> previous fence >>>>> + * @chain: chain node to get the previous node from >>>>> + * >>>>> + * Use dma_fence_get_rcu_safe to get a reference to the previous >>>>> fence of the >>>>> + * chain node. >>>>> + */ >>>>> +static struct dma_fence *dma_fence_chain_get_prev(struct >>>>> dma_fence_chain *chain) >>>>> +{ >>>>> + struct dma_fence *prev; >>>>> + >>>>> + rcu_read_lock(); >>>>> + prev = dma_fence_get_rcu_safe(&chain->prev); >>>>> + rcu_read_unlock(); >>>>> + return prev; >>>>> +} >>>>> + >>>>> +/** >>>>> + * dma_fence_chain_walk - chain walking function >>>>> + * @fence: current chain node >>>>> + * >>>>> + * Walk the chain to the next node. Returns the next fence or NULL >>>>> if we are at >>>>> + * the end of the chain. Garbage collects chain nodes which are >>>>> already >>>>> + * signaled. >>>>> + */ >>>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence) >>>>> +{ >>>>> + struct dma_fence_chain *chain, *prev_chain; >>>>> + struct dma_fence *prev, *replacement, *tmp; >>>>> + >>>>> + chain = to_dma_fence_chain(fence); >>>>> + if (!chain) { >>>>> + dma_fence_put(fence); >>>>> + return NULL; >>>>> + } >>>>> + >>>>> + while ((prev = dma_fence_chain_get_prev(chain))) { >>>>> + >>>>> + prev_chain = to_dma_fence_chain(prev); >>>>> + if (prev_chain) { >>>>> + if (!dma_fence_is_signaled(prev_chain->fence)) >>>>> + break; >>>>> + >>>>> + replacement = dma_fence_chain_get_prev(prev_chain); >>>>> + } else { >>>>> + if (!dma_fence_is_signaled(prev)) >>>>> + break; >>>>> + >>>>> + replacement = NULL; >>>>> + } >>>>> + >>>>> + tmp = cmpxchg(&chain->prev, prev, replacement); >>>>> + if (tmp == prev) >>>>> + dma_fence_put(tmp); >>>>> + else >>>>> + dma_fence_put(replacement); >>>>> + dma_fence_put(prev); >>>>> + } >>>>> + >>>>> + dma_fence_put(fence); >>>>> + return prev; >>>>> +} >>>>> +EXPORT_SYMBOL(dma_fence_chain_walk); >>>>> + >>>>> +/** >>>>> + * dma_fence_chain_find_seqno - find fence chain node by seqno >>>>> + * @pfence: pointer to the chain node where to start >>>>> + * @seqno: the sequence number to search for >>>>> + * >>>>> + * Advance the fence pointer to the chain node which will signal >>>>> this sequence >>>>> + * number. If no sequence number is provided then this is a no-op. >>>>> + * >>>>> + * Returns EINVAL if the fence is not a chain node or the sequence >>>>> number has >>>>> + * not yet advanced far enough. >>>>> + */ >>>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t >>>>> seqno) >>>>> +{ >>>>> + struct dma_fence_chain *chain; >>>>> + >>>>> + if (!seqno) >>>>> + return 0; >>>>> + >>>>> + chain = to_dma_fence_chain(*pfence); >>>>> + if (!chain || chain->base.seqno < seqno) >>>>> + return -EINVAL; >>>>> + >>>>> + dma_fence_chain_for_each(*pfence) { >>>>> + if ((*pfence)->context != chain->base.context || >>>>> + to_dma_fence_chain(*pfence)->prev_seqno < seqno) >>>>> + break; >>>>> + } >>>>> + dma_fence_put(&chain->base); >>>>> + >>>>> + return 0; >>>>> +} >>>>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno); >>>>> + >>>>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence >>>>> *fence) >>>>> +{ >>>>> + return "dma_fence_chain"; >>>>> +} >>>>> + >>>>> +static const char *dma_fence_chain_get_timeline_name(struct >>>>> dma_fence *fence) >>>>> +{ >>>>> + return "unbound"; >>>>> +} >>>>> + >>>>> +static void dma_fence_chain_irq_work(struct irq_work *work) >>>>> +{ >>>>> + struct dma_fence_chain *chain; >>>>> + >>>>> + chain = container_of(work, typeof(*chain), work); >>>>> + >>>>> + /* Try to rearm the callback */ >>>>> + if (!dma_fence_chain_enable_signaling(&chain->base)) >>>>> + /* Ok, we are done. No more unsignaled fences left */ >>>>> + dma_fence_signal(&chain->base); >>>>> +} >>>>> + >>>>> +static void dma_fence_chain_cb(struct dma_fence *f, struct >>>>> dma_fence_cb *cb) >>>>> +{ >>>>> + struct dma_fence_chain *chain; >>>>> + >>>>> + chain = container_of(cb, typeof(*chain), cb); >>>>> + irq_work_queue(&chain->work); >>>>> + dma_fence_put(f); >>>>> +} >>>>> + >>>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence >>>>> *fence) >>>>> +{ >>>>> + struct dma_fence_chain *head = to_dma_fence_chain(fence); >>>>> + >>>>> + dma_fence_chain_for_each(fence) { >>>>> + struct dma_fence_chain *chain = to_dma_fence_chain(fence); >>>>> + struct dma_fence *f = chain ? chain->fence : fence; >>>>> + >>>>> + if (!dma_fence_add_callback(f, &head->cb, >>>>> dma_fence_chain_cb)) >>>>> + return true; >>>>> + } >>>>> + return false; >>>>> +} >>>>> + >>>>> +static bool dma_fence_chain_signaled(struct dma_fence *fence) >>>>> +{ >>>>> + dma_fence_chain_for_each(fence) { >>>>> + struct dma_fence_chain *chain = to_dma_fence_chain(fence); >>>>> + struct dma_fence *f = chain ? chain->fence : fence; >>>>> + >>>>> + if (!dma_fence_is_signaled(f)) { >>>>> + dma_fence_put(fence); >>>>> + return false; >>>>> + } >>>>> + } >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> +static void dma_fence_chain_release(struct dma_fence *fence) >>>>> +{ >>>>> + struct dma_fence_chain *chain = to_dma_fence_chain(fence); >>>>> + >>>>> + dma_fence_put(chain->prev); >>>>> + dma_fence_put(chain->fence); >>>>> + dma_fence_free(fence); >>>>> +} >>>>> + >>>>> +const struct dma_fence_ops dma_fence_chain_ops = { >>>>> + .get_driver_name = dma_fence_chain_get_driver_name, >>>>> + .get_timeline_name = dma_fence_chain_get_timeline_name, >>>>> + .enable_signaling = dma_fence_chain_enable_signaling, >>>>> + .signaled = dma_fence_chain_signaled, >>>>> + .release = dma_fence_chain_release, >>>>> +}; >>>>> +EXPORT_SYMBOL(dma_fence_chain_ops); >>>>> + >>>>> +/** >>>>> + * dma_fence_chain_init - initialize a fence chain >>>>> + * @chain: the chain node to initialize >>>>> + * @prev: the previous fence >>>>> + * @fence: the current fence >>>>> + * >>>>> + * Initialize a new chain node and either start a new chain or add >>>>> the node to >>>>> + * the existing chain of the previous fence. >>>>> + */ >>>>> +void dma_fence_chain_init(struct dma_fence_chain *chain, >>>>> + struct dma_fence *prev, >>>>> + struct dma_fence *fence, >>>>> + uint64_t seqno) >>>>> +{ >>>>> + struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev); >>>>> + uint64_t context; >>>>> + >>>>> + spin_lock_init(&chain->lock); >>>>> + chain->prev = prev; >>>>> + chain->fence = fence; >>>>> + chain->prev_seqno = 0; >>>>> + init_irq_work(&chain->work, dma_fence_chain_irq_work); >>>>> + >>>>> + /* Try to reuse the context of the previous chain node. */ >>>>> + if (prev_chain && seqno > prev->seqno && >>>>> + __dma_fence_is_later(seqno, prev->seqno)) { >>>> As your patch#1 makes __dma_fence_is_later only be valid for 32bit, >>>> we cannot use it for 64bit here, we should remove it from here, just >>>> compare seqno directly. >>> That is intentional. We must make sure that the number both increments >>> as 64bit number as well as not wraps around as 32bit number. >>> >>> In other words the largest difference between two sequence numbers >>> userspace is allowed to submit is 1<<31. >> Why? no one can make sure that, application users would only think it is >> an uint64 sequence nubmer, and they can signal any advanced point. I >> already see umd guys writing timeline test use max_uint64-1 as a final >> signal. >> We shouldn't add this limitation here. > > We need to be backward compatible to hardware which can only do 32bit > signaling with the dma-fence implementation. I see that, you already explained that before. but can't we just grep low 32bit of seqno only when 32bit hardware try to use? then we can make dma_fence_later use 64bit comparation. > > Otherwise dma_fence_later() could return an inconsistent result and > break at other places. > > So if userspace wants to use more than 1<<31 difference between > sequence numbers we need to push back on this. It's rare case, but I don't think we can convince them add this limitation. So we cannot add this limitation here. -David > > Christian. > >> >> -David >> >>>>> + context = prev->context; >>>>> + chain->prev_seqno = prev->seqno; >>>>> + } else { >>>>> + context = dma_fence_context_alloc(1); >>>>> + /* Make sure that we always have a valid sequence number. */ >>>>> + if (prev_chain) >>>>> + seqno = max(prev->seqno, seqno); >>>> I still cannot judge if this case is proper, but I prefer we just >>>> drop it when seqno is <= last seqno. >>>> we can image that when we enable export/import, we could mess them. >>>> So we shouldn't change timeline existing signal point any time. >>> Yeah, but we can't lose fences either. This is the most defensive >>> approach I can think of. >>> >>> E.g. we still ad the fence, but start a new timeline so all queries >>> for all previous sequence number will always wait for everything. >>> >>> Regards, >>> Christian. >>> >>>> -David >>>>> + } >>>>> + >>>>> + dma_fence_init(&chain->base, &dma_fence_chain_ops, >>>>> + &chain->lock, context, seqno); >>>>> +} >>>>> +EXPORT_SYMBOL(dma_fence_chain_init); >>>>> diff --git a/include/linux/dma-fence-chain.h >>>>> b/include/linux/dma-fence-chain.h >>>>> new file mode 100644 >>>>> index 000000000000..3bb0efd6bc65 >>>>> --- /dev/null >>>>> +++ b/include/linux/dma-fence-chain.h >>>>> @@ -0,0 +1,79 @@ >>>>> +/* >>>>> + * fence-chain: chain fences together in a timeline >>>>> + * >>>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc. >>>>> + * Authors: >>>>> + * Christian König <christian.koenig@xxxxxxx> >>>>> + * >>>>> + * This program is free software; you can redistribute it and/or >>>>> modify it >>>>> + * under the terms of the GNU General Public License version 2 as >>>>> published by >>>>> + * the Free Software Foundation. >>>>> + * >>>>> + * This program is distributed in the hope that it will be useful, >>>>> but WITHOUT >>>>> + * ANY WARRANTY; without even the implied warranty of >>>>> MERCHANTABILITY or >>>>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public >>>>> License for >>>>> + * more details. >>>>> + */ >>>>> + >>>>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H >>>>> +#define __LINUX_DMA_FENCE_CHAIN_H >>>>> + >>>>> +#include <linux/dma-fence.h> >>>>> +#include <linux/irq_work.h> >>>>> + >>>>> +/** >>>>> + * struct dma_fence_chain - fence to represent an node of a fence >>>>> chain >>>>> + * @base: fence base class >>>>> + * @lock: spinlock for fence handling >>>>> + * @prev: previous fence of the chain >>>>> + * @prev_seqno: original previous seqno before garbage collection >>>>> + * @fence: encapsulated fence >>>>> + * @cb: callback structure for signaling >>>>> + * @work: irq work item for signaling >>>>> + */ >>>>> +struct dma_fence_chain { >>>>> + struct dma_fence base; >>>>> + spinlock_t lock; >>>>> + struct dma_fence *prev; >>>>> + u64 prev_seqno; >>>>> + struct dma_fence *fence; >>>>> + struct dma_fence_cb cb; >>>>> + struct irq_work work; >>>>> +}; >>>>> + >>>>> +extern const struct dma_fence_ops dma_fence_chain_ops; >>>>> + >>>>> +/** >>>>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain >>>>> + * @fence: fence to cast to a dma_fence_array >>>>> + * >>>>> + * Returns NULL if the fence is not a dma_fence_chain, >>>>> + * or the dma_fence_chain otherwise. >>>>> + */ >>>>> +static inline struct dma_fence_chain * >>>>> +to_dma_fence_chain(struct dma_fence *fence) >>>>> +{ >>>>> + if (!fence || fence->ops != &dma_fence_chain_ops) >>>>> + return NULL; >>>>> + >>>>> + return container_of(fence, struct dma_fence_chain, base); >>>>> +} >>>>> + >>>>> +/** >>>>> + * dma_fence_chain_for_each - iterate over all fences in chain >>>>> + * @fence: starting point as well as current fence >>>>> + * >>>>> + * Iterate over all fences in the chain. We keep a reference to the >>>>> current >>>>> + * fence while inside the loop which must be dropped when breaking >>>>> out. >>>>> + */ >>>>> +#define dma_fence_chain_for_each(fence) \ >>>>> + for >>>>> (dma_fence_get(fence);fence;fence=dma_fence_chain_walk(fence)) >>>>> + >>>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence); >>>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t >>>>> seqno); >>>>> +void dma_fence_chain_init(struct dma_fence_chain *chain, >>>>> + struct dma_fence *prev, >>>>> + struct dma_fence *fence, >>>>> + uint64_t seqno); >>>>> + >>>>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */ >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx