As AE4DMA offers multi-channel functionality compared to PTDMA’s single queue, utilize multi-queue, which supports higher speeds than PTDMA, to achieve higher performance using the AE4DMA workqueue based mechanism. Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version") Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx> --- drivers/dma/amd/ae4dma/ae4dma.h | 2 + drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h index 265c5d436008..57f6048726bb 100644 --- a/drivers/dma/amd/ae4dma/ae4dma.h +++ b/drivers/dma/amd/ae4dma/ae4dma.h @@ -37,6 +37,8 @@ #define AE4_DMA_VERSION 4 #define CMD_AE4_DESC_DW0_VAL 2 +#define AE4_TIME_OUT 5000 + struct ae4_msix { int msix_count; struct msix_entry msix_entry[MAX_AE4_HW_QUEUES]; diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index 35c84ec9608b..715ac3ae067b 100644 --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, { struct dma_async_tx_descriptor *tx_desc; struct virt_dma_desc *vd; + struct pt_device *pt; unsigned long flags; + pt = chan->pt; /* Loop over descriptors until one is found with commands */ do { if (desc) { @@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, spin_lock_irqsave(&chan->vc.lock, flags); - if (desc) { + if (pt->ver != AE4_DMA_VERSION && desc) { if (desc->status != DMA_COMPLETE) { if (desc->status != DMA_ERROR) desc->status = DMA_COMPLETE; @@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, spin_unlock_irqrestore(&chan->vc.lock, flags); - if (tx_desc) { + if (pt->ver != AE4_DMA_VERSION && tx_desc) { dmaengine_desc_get_callback_invoke(tx_desc, NULL); dma_run_dependencies(tx_desc); vchan_vdesc_fini(vd); @@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, return NULL; } +static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q) +{ + u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF); + u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF); + + if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1)) + return true; + + return false; +} + static void pt_cmd_callback(void *data, int err) { struct pt_dma_desc *desc = data; + struct ae4_cmd_queue *ae4cmd_q; struct dma_chan *dma_chan; struct pt_dma_chan *chan; + struct ae4_device *ae4; + struct pt_device *pt; int ret; if (err == -EINPROGRESS) @@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err) dma_chan = desc->vd.tx.chan; chan = to_pt_chan(dma_chan); + pt = chan->pt; if (err) desc->status = DMA_ERROR; while (true) { + if (pt->ver == AE4_DMA_VERSION) { + ae4 = container_of(pt, struct ae4_device, pt); + ae4cmd_q = &ae4->ae4cmd_q[chan->id]; + + if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) || + ae4_core_queue_full(&ae4cmd_q->cmd_q)) { + wake_up(&ae4cmd_q->q_w); + + if (wait_for_completion_timeout(&ae4cmd_q->cmp, + msecs_to_jiffies(AE4_TIME_OUT)) + == 0) { + dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id); + break; + } + + reinit_completion(&ae4cmd_q->cmp); + continue; + } + } + /* Check for DMA descriptor completion */ desc = pt_handle_active_desc(chan, desc); @@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan, return desc; } +static void pt_cmd_callback_work(void *data, int err) +{ + struct dma_async_tx_descriptor *tx_desc; + struct pt_dma_desc *desc = data; + struct dma_chan *dma_chan; + struct virt_dma_desc *vd; + struct pt_dma_chan *chan; + unsigned long flags; + + dma_chan = desc->vd.tx.chan; + chan = to_pt_chan(dma_chan); + + if (err == -EINPROGRESS) + return; + + tx_desc = &desc->vd.tx; + vd = &desc->vd; + + if (err) + desc->status = DMA_ERROR; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (desc) { + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; + + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } else { + tx_desc = NULL; + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + + if (tx_desc) { + dmaengine_desc_get_callback_invoke(tx_desc, NULL); + dma_run_dependencies(tx_desc); + list_del(&desc->vd.node); + vchan_vdesc_fini(vd); + } +} + static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, dma_addr_t dst, dma_addr_t src, @@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, desc->len = len; if (pt->ver == AE4_DMA_VERSION) { + pt_cmd->pt_cmd_callback = pt_cmd_callback_work; ae4 = container_of(pt, struct ae4_device, pt); ae4cmd_q = &ae4->ae4cmd_q[chan->id]; mutex_lock(&ae4cmd_q->cmd_lock); @@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan) { struct pt_dma_chan *chan = to_pt_chan(dma_chan); struct pt_dma_desc *desc; + struct pt_device *pt; unsigned long flags; bool engine_is_idle = true; + pt = chan->pt; + spin_lock_irqsave(&chan->vc.lock, flags); desc = pt_next_dma_desc(chan); - if (desc) + if (desc && pt->ver != AE4_DMA_VERSION) engine_is_idle = false; vchan_issue_pending(&chan->vc); -- 2.25.1