On 16-10-20, 02:39, Sanjay R Mehta wrote: > diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c > new file mode 100644 > index 0000000..9b9b77c > --- /dev/null > +++ b/drivers/dma/ptdma/ptdma-dmaengine.c > @@ -0,0 +1,554 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * AMD Passthrough DMA device driver > + * -- Based on the CCP driver > + * > + * Copyright (C) 2016,2020 Advanced Micro Devices, Inc. > + * > + * Author: Sanjay R Mehta <sanju.mehta@xxxxxxx> > + * Author: Gary R Hook <gary.hook@xxxxxxx> > + */ > + > +#include "ptdma.h" > +#include "../dmaengine.h" > +#include "../virt-dma.h" > + > +#define PT_DMA_WIDTH(_mask) \ > +({ \ > + u64 mask = (_mask) + 1; \ > + (mask == 0) ? 64 : fls64(mask); \ how would mask be 0 here..? > +static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, > + struct pt_dma_desc *desc) > +{ > + struct dma_async_tx_descriptor *tx_desc; > + struct virt_dma_desc *vd; > + unsigned long flags; > + > + /* Loop over descriptors until one is found with commands */ > + do { > + if (desc) { > + /* Remove the DMA command from the list and free it */ > + pt_free_active_cmd(desc); > + if (!desc->issued_to_hw) { > + /* No errors, keep going */ > + if (desc->status != DMA_ERROR) > + return desc; > + /* Error, free remaining commands and move on */ > + pt_free_cmd_resources(desc->pt, > + &desc->cmdlist); this should be single line > + } > + > + tx_desc = &desc->vd.tx; > + vd = &desc->vd; > + } else { > + tx_desc = NULL; > + } > + > + spin_lock_irqsave(&chan->vc.lock, flags); > + > + if (desc) { > + if (desc->status != DMA_ERROR) > + desc->status = DMA_COMPLETE; > + > + dma_cookie_complete(tx_desc); > + dma_descriptor_unmap(tx_desc); > + list_del(&desc->vd.node); > + } > + > + desc = pt_next_dma_desc(chan); > + > + spin_unlock_irqrestore(&chan->vc.lock, flags); > + > + if (tx_desc) { > + dmaengine_desc_get_callback_invoke(tx_desc, NULL); > + dma_run_dependencies(tx_desc); > + vchan_vdesc_fini(vd); > + } > + } while (desc); So IIUC, this has two functions, one to find active desc and second to complete and invoke callback. These two are different tasks and I am not sure why these are handled here. Can you split these up into different routines, that may help making this read better > +static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, > + struct scatterlist *dst_sg, > + unsigned int dst_nents, > + struct scatterlist *src_sg, > + unsigned int src_nents, > + unsigned long flags) > +{ > + struct pt_dma_chan *chan = to_pt_chan(dma_chan); > + struct pt_device *pt = chan->pt; > + struct pt_dma_desc *desc; > + struct pt_dma_cmd *cmd; > + struct pt_cmd *pt_cmd; > + struct pt_passthru_engine *pt_engine; > + unsigned int src_offset, src_len; > + unsigned int dst_offset, dst_len; > + unsigned int len; > + size_t total_len; > + > + if (!dst_sg || !src_sg) > + return NULL; > + > + if (!dst_nents || !src_nents) > + return NULL; > + > + desc = pt_alloc_dma_desc(chan, flags); > + if (!desc) > + return NULL; > + > + total_len = 0; > + > + src_len = sg_dma_len(src_sg); > + src_offset = 0; > + > + dst_len = sg_dma_len(dst_sg); > + dst_offset = 0; > + > + while (true) { > + if (!src_len) { > + src_nents--; > + if (!src_nents) > + break; > + > + src_sg = sg_next(src_sg); > + if (!src_sg) > + break; > + > + src_len = sg_dma_len(src_sg); > + src_offset = 0; > + continue; > + } > + > + if (!dst_len) { > + dst_nents--; > + if (!dst_nents) > + break; > + > + dst_sg = sg_next(dst_sg); > + if (!dst_sg) > + break; > + > + dst_len = sg_dma_len(dst_sg); > + dst_offset = 0; > + continue; > + } > + > + len = min(dst_len, src_len); Ah why not use for_each_sg() > + > + cmd = pt_alloc_dma_cmd(chan); > + if (!cmd) > + goto err; > + > + pt_cmd = &cmd->pt_cmd; > + pt_cmd->pt = chan->pt; > + pt_engine = &pt_cmd->passthru; > + pt_cmd->engine = PT_ENGINE_PASSTHRU; > + pt_engine->src_dma = sg_dma_address(src_sg) + src_offset; > + pt_engine->dst_dma = sg_dma_address(dst_sg) + dst_offset; > + pt_engine->src_len = len; > + pt_cmd->pt_cmd_callback = pt_cmd_callback; > + pt_cmd->data = desc; > + > + list_add_tail(&cmd->entry, &desc->cmdlist); > + > + total_len += len; > + > + src_len -= len; > + src_offset += len; > + > + dst_len -= len; > + dst_offset += len; > + } since you have both src and dst sgl that needs to be checked as well, but where are the two lists coming from..? something does not sound right here > +static struct dma_async_tx_descriptor * > +pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst, > + dma_addr_t src, size_t len, unsigned long flags) > +{ > + struct scatterlist dst_sg, src_sg; > + struct pt_dma_desc *desc; > + > + sg_init_table(&dst_sg, 1); > + sg_dma_address(&dst_sg) = dst; > + sg_dma_len(&dst_sg) = len; > + > + sg_init_table(&src_sg, 1); > + sg_dma_address(&src_sg) = src; > + sg_dma_len(&src_sg) = len; Why do you need this overhead, why not pass on the addresses here? So you create sg list here and then unravel that for description creation. Typically we would have a lower level API which would handle dma_addr_t and that can be called from here and sgl users! > +static enum dma_status pt_tx_status(struct dma_chan *dma_chan, > + dma_cookie_t cookie, > + struct dma_tx_state *state) > +{ > + struct pt_dma_chan *chan = to_pt_chan(dma_chan); > + struct pt_dma_desc *desc; > + enum dma_status ret; > + unsigned long flags; > + struct virt_dma_desc *vd; > + > + ret = dma_cookie_status(dma_chan, cookie, state); > + if (ret == DMA_COMPLETE) > + return ret; > + > + spin_lock_irqsave(&chan->vc.lock, flags); > + vd = vchan_find_desc(&chan->vc, cookie); > + desc = vd ? to_pt_desc(vd) : NULL; > + ret = desc->status; > + spin_unlock_irqrestore(&chan->vc.lock, flags); how does this block of code help? > +int pt_dmaengine_register(struct pt_device *pt) > +{ > + struct pt_dma_chan *chan; > + struct dma_device *dma_dev = &pt->dma_dev; > + char *dma_cmd_cache_name; > + char *dma_desc_cache_name; > + int ret; > + > + pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan), > + GFP_KERNEL); > + if (!pt->pt_dma_chan) > + return -ENOMEM; > + > + dma_cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, maybe user shorter variable names > + "%s-dmaengine-cmd-cache", > + pt->name); > + if (!dma_cmd_cache_name) > + return -ENOMEM; > + > + pt->dma_cmd_cache = kmem_cache_create(dma_cmd_cache_name, > + sizeof(struct pt_dma_cmd), > + sizeof(void *), > + SLAB_HWCACHE_ALIGN, NULL); > + if (!pt->dma_cmd_cache) > + return -ENOMEM; > + > + dma_desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, > + "%s-dmaengine-desc-cache", > + pt->name); > + if (!dma_desc_cache_name) { > + ret = -ENOMEM; > + goto err_cache; > + } > + > + pt->dma_desc_cache = kmem_cache_create(dma_desc_cache_name, > + sizeof(struct pt_dma_desc), > + sizeof(void *), > + SLAB_HWCACHE_ALIGN, NULL); > + if (!pt->dma_desc_cache) { > + ret = -ENOMEM; > + goto err_cache; > + } > + > + dma_dev->dev = pt->dev; > + dma_dev->src_addr_widths = PT_DMA_WIDTH(dma_get_mask(pt->dev)); > + dma_dev->dst_addr_widths = PT_DMA_WIDTH(dma_get_mask(pt->dev)); > + dma_dev->directions = DMA_MEM_TO_MEM; > + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; > + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); > + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); > + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); Why DMA_PRIVATE for a memcpy function? -- ~Vinod