Re: [PATCH 3/3] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality

Vinod Koul <vkoul@xxxxxxxxxx> · Mon, 10 Feb 2025 19:48:16 +0530

On 03-02-25, 21:55, Basavaraj Natikar wrote:
> As AE4DMA offers multi-channel functionality compared to PTDMA’s single
> queue, utilize multi-queue, which supports higher speeds than PTDMA, to
> achieve higher performance using the AE4DMA workqueue based mechanism.
> 
> Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version")

Why is this a fix, again!

> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> ---
>  drivers/dma/amd/ae4dma/ae4dma.h         |  2 +
>  drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++-
>  2 files changed, 89 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
> index 265c5d436008..57f6048726bb 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma.h
> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
> @@ -37,6 +37,8 @@
>  #define AE4_DMA_VERSION			4
>  #define CMD_AE4_DESC_DW0_VAL		2
>  
> +#define AE4_TIME_OUT			5000
> +
>  struct ae4_msix {
>  	int msix_count;
>  	struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
> diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
> index 35c84ec9608b..715ac3ae067b 100644
> --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
> +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
> @@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>  {
>  	struct dma_async_tx_descriptor *tx_desc;
>  	struct virt_dma_desc *vd;
> +	struct pt_device *pt;
>  	unsigned long flags;
>  
> +	pt = chan->pt;
>  	/* Loop over descriptors until one is found with commands */
>  	do {
>  		if (desc) {
> @@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>  
>  		spin_lock_irqsave(&chan->vc.lock, flags);
>  
> -		if (desc) {
> +		if (pt->ver != AE4_DMA_VERSION && desc) {
>  			if (desc->status != DMA_COMPLETE) {
>  				if (desc->status != DMA_ERROR)
>  					desc->status = DMA_COMPLETE;
> @@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>  
>  		spin_unlock_irqrestore(&chan->vc.lock, flags);
>  
> -		if (tx_desc) {
> +		if (pt->ver != AE4_DMA_VERSION && tx_desc) {

Why should this handling be different for DMA_VERSION?

>  			dmaengine_desc_get_callback_invoke(tx_desc, NULL);
>  			dma_run_dependencies(tx_desc);
>  			vchan_vdesc_fini(vd);
> @@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
>  	return NULL;
>  }
>  
> +static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
> +{
> +	u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
> +	u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
> +
> +	if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN)  >= (MAX_CMD_QLEN - 1))
> +		return true;
> +
> +	return false;
> +}
> +
>  static void pt_cmd_callback(void *data, int err)
>  {
>  	struct pt_dma_desc *desc = data;
> +	struct ae4_cmd_queue *ae4cmd_q;
>  	struct dma_chan *dma_chan;
>  	struct pt_dma_chan *chan;
> +	struct ae4_device *ae4;
> +	struct pt_device *pt;
>  	int ret;
>  
>  	if (err == -EINPROGRESS)
> @@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
>  
>  	dma_chan = desc->vd.tx.chan;
>  	chan = to_pt_chan(dma_chan);
> +	pt = chan->pt;
>  
>  	if (err)
>  		desc->status = DMA_ERROR;
>  
>  	while (true) {
> +		if (pt->ver == AE4_DMA_VERSION) {
> +			ae4 = container_of(pt, struct ae4_device, pt);
> +			ae4cmd_q = &ae4->ae4cmd_q[chan->id];
> +
> +			if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
> +			    ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
> +				wake_up(&ae4cmd_q->q_w);
> +
> +				if (wait_for_completion_timeout(&ae4cmd_q->cmp,
> +								msecs_to_jiffies(AE4_TIME_OUT))
> +								== 0) {
> +					dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
> +					break;
> +				}
> +
> +				reinit_completion(&ae4cmd_q->cmp);
> +				continue;
> +			}
> +		}
> +
>  		/* Check for DMA descriptor completion */
>  		desc = pt_handle_active_desc(chan, desc);
>  
> @@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
>  	return desc;
>  } 
>  
> +static void pt_cmd_callback_work(void *data, int err)
> +{
> +	struct dma_async_tx_descriptor *tx_desc;
> +	struct pt_dma_desc *desc = data;
> +	struct dma_chan *dma_chan;
> +	struct virt_dma_desc *vd;
> +	struct pt_dma_chan *chan;
> +	unsigned long flags;
> +
> +	dma_chan = desc->vd.tx.chan;
> +	chan = to_pt_chan(dma_chan);
> +
> +	if (err == -EINPROGRESS)
> +		return;
> +
> +	tx_desc = &desc->vd.tx;
> +	vd = &desc->vd;
> +
> +	if (err)
> +		desc->status = DMA_ERROR;
> +
> +	spin_lock_irqsave(&chan->vc.lock, flags);
> +	if (desc) {
> +		if (desc->status != DMA_COMPLETE) {
> +			if (desc->status != DMA_ERROR)
> +				desc->status = DMA_COMPLETE;
> +
> +			dma_cookie_complete(tx_desc);
> +			dma_descriptor_unmap(tx_desc);
> +		} else {
> +			tx_desc = NULL;
> +		}
> +	}
> +	spin_unlock_irqrestore(&chan->vc.lock, flags);
> +
> +	if (tx_desc) {
> +		dmaengine_desc_get_callback_invoke(tx_desc, NULL);
> +		dma_run_dependencies(tx_desc);
> +		list_del(&desc->vd.node);
> +		vchan_vdesc_fini(vd);
> +	}
> +}

Why do we have callback in driver...?

> +
>  static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
>  					  dma_addr_t dst,
>  					  dma_addr_t src,
> @@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
>  	desc->len = len;
>  
>  	if (pt->ver == AE4_DMA_VERSION) {
> +		pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
>  		ae4 = container_of(pt, struct ae4_device, pt);
>  		ae4cmd_q = &ae4->ae4cmd_q[chan->id];
>  		mutex_lock(&ae4cmd_q->cmd_lock);
> @@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
>  {
>  	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
>  	struct pt_dma_desc *desc;
> +	struct pt_device *pt;
>  	unsigned long flags;
>  	bool engine_is_idle = true;
>  
> +	pt = chan->pt;
> +
>  	spin_lock_irqsave(&chan->vc.lock, flags);
>  
>  	desc = pt_next_dma_desc(chan);
> -	if (desc)
> +	if (desc && pt->ver != AE4_DMA_VERSION)
>  		engine_is_idle = false;
>  
>  	vchan_issue_pending(&chan->vc);
> -- 
> 2.25.1

-- 
~Vinod