Re: [PATCH 2/7] dmaengine: ae4dma: Add AMD ae4dma controller driver

Frank Li <Frank.li@xxxxxxx> · Thu, 23 May 2024 18:02:21 -0400

On Tue, May 21, 2024 at 03:06:17PM +0530, Basavaraj Natikar wrote:
> 
> On 5/10/2024 11:46 PM, Frank Li wrote:
> > On Fri, May 10, 2024 at 01:50:48PM +0530, Basavaraj Natikar wrote:
> >> Add support for AMD AE4DMA controller. It performs high-bandwidth
> >> memory to memory and IO copy operation. Device commands are managed
> >> via a circular queue of 'descriptors', each of which specifies source
> >> and destination addresses for copying a single buffer of data.
> >>
> >> Reviewed-by: Raju Rangoju <Raju.Rangoju@xxxxxxx>
> >> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> >> ---
> >>  MAINTAINERS                         |   6 +
> >>  drivers/dma/amd/Kconfig             |   1 +
> >>  drivers/dma/amd/Makefile            |   1 +
> >>  drivers/dma/amd/ae4dma/Kconfig      |  13 ++
> >>  drivers/dma/amd/ae4dma/Makefile     |  10 ++
> >>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 206 ++++++++++++++++++++++++++++
> >>  drivers/dma/amd/ae4dma/ae4dma-pci.c | 195 ++++++++++++++++++++++++++
> >>  drivers/dma/amd/ae4dma/ae4dma.h     |  77 +++++++++++
> >>  drivers/dma/amd/common/amd_dma.h    |  26 ++++
> >>  9 files changed, 535 insertions(+)
> >>  create mode 100644 drivers/dma/amd/ae4dma/Kconfig
> >>  create mode 100644 drivers/dma/amd/ae4dma/Makefile
> >>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-dev.c
> >>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-pci.c
> >>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma.h
> >>  create mode 100644 drivers/dma/amd/common/amd_dma.h
> >>
> >> diff --git a/MAINTAINERS b/MAINTAINERS
> >> index b190efda33ba..45f2140093b6 100644
> >> --- a/MAINTAINERS
> >> +++ b/MAINTAINERS
> >> @@ -909,6 +909,12 @@ L:	linux-edac@xxxxxxxxxxxxxxx
> >>  S:	Supported
> >>  F:	drivers/ras/amd/atl/*
> >>  
> >> +AMD AE4DMA DRIVER
> >> +M:	Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> >> +L:	dmaengine@xxxxxxxxxxxxxxx
> >> +S:	Maintained
> >> +F:	drivers/dma/amd/ae4dma/
> >> +
> >>  AMD AXI W1 DRIVER
> >>  M:	Kris Chaplin <kris.chaplin@xxxxxxx>
> >>  R:	Thomas Delev <thomas.delev@xxxxxxx>
> >> diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig
> >> index 8246b463bcf7..8c25a3ed6b94 100644
> >> --- a/drivers/dma/amd/Kconfig
> >> +++ b/drivers/dma/amd/Kconfig
> >> @@ -3,3 +3,4 @@
> >>  # AMD DMA Drivers
> >>  
> >>  source "drivers/dma/amd/ptdma/Kconfig"
> >> +source "drivers/dma/amd/ae4dma/Kconfig"
> >> diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
> >> index dd7257ba7e06..8049b06a9ff5 100644
> >> --- a/drivers/dma/amd/Makefile
> >> +++ b/drivers/dma/amd/Makefile
> >> @@ -4,3 +4,4 @@
> >>  #
> >>  
> >>  obj-$(CONFIG_AMD_PTDMA) += ptdma/
> >> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
> >> diff --git a/drivers/dma/amd/ae4dma/Kconfig b/drivers/dma/amd/ae4dma/Kconfig
> >> new file mode 100644
> >> index 000000000000..cf8db4dac98d
> >> --- /dev/null
> >> +++ b/drivers/dma/amd/ae4dma/Kconfig
> >> @@ -0,0 +1,13 @@
> >> +# SPDX-License-Identifier: GPL-2.0
> >> +config AMD_AE4DMA
> >> +	tristate  "AMD AE4DMA Engine"
> >> +	depends on X86_64 && PCI
> >> +	select DMA_ENGINE
> >> +	select DMA_VIRTUAL_CHANNELS
> >> +	help
> >> +	  Enable support for the AMD AE4DMA controller. This controller
> >> +	  provides DMA capabilities to perform high bandwidth memory to
> >> +	  memory and IO copy operations. It performs DMA transfer through
> >> +	  queue-based descriptor management. This DMA controller is intended
> >> +	  to be used with AMD Non-Transparent Bridge devices and not for
> >> +	  general purpose peripheral DMA.
> >> diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile
> >> new file mode 100644
> >> index 000000000000..e918f85a80ec
> >> --- /dev/null
> >> +++ b/drivers/dma/amd/ae4dma/Makefile
> >> @@ -0,0 +1,10 @@
> >> +# SPDX-License-Identifier: GPL-2.0
> >> +#
> >> +# AMD AE4DMA driver
> >> +#
> >> +
> >> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
> >> +
> >> +ae4dma-objs := ae4dma-dev.o
> >> +
> >> +ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
> >> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> >> new file mode 100644
> >> index 000000000000..fc33d2056af2
> >> --- /dev/null
> >> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> >> @@ -0,0 +1,206 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * AMD AE4DMA driver
> >> + *
> >> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> >> + * All Rights Reserved.
> >> + *
> >> + * Author: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> >> + */
> >> +
> >> +#include "ae4dma.h"
> >> +
> >> +static unsigned int max_hw_q = 1;
> >> +module_param(max_hw_q, uint, 0444);
> >> +MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
> > Does it get from hardware register? you put to global variable. How about
> > system have two difference DMA controllers, one's max_hw_q is 1, the other
> > is 2.
> 
> Yes, this global value configures the default hardware register to 1. Since
> all DMA controllers are identical, they will all have the same value set for
> all DMA controllers. 

Even it is same now. I still perfer put 

+static const struct pci_device_id ae4_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x14C8), MAX_HW_Q},
				    ^^^^^^^^

+	{ PCI_VDEVICE(AMD, 0x14DC), ...},
+	{ PCI_VDEVICE(AMD, 0x149B), ...},
+	/* Last entry must be zero */
+	{ 0, }

So if new design increase queue number in future. 
You just need add one line here.

Frank

> 
> >
> >> +
> >> +static char *ae4_error_codes[] = {
> >> +	"",
> >> +	"ERR 01: INVALID HEADER DW0",
> >> +	"ERR 02: INVALID STATUS",
> >> +	"ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
> >> +	"ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
> >> +	"ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
> >> +	"ERR 06: INVALID ALIGNMENT",
> >> +	"ERR 07: INVALID DESCRIPTOR",
> >> +};
> >> +
> >> +static void ae4_log_error(struct pt_device *d, int e)
> >> +{
> >> +	if (e <= 7)
> >> +		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
> >> +	else if (e > 7 && e <= 15)
> >> +		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
> >> +	else if (e > 15 && e <= 31)
> >> +		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
> >> +	else if (e > 31 && e <= 63)
> >> +		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
> >> +	else if (e > 63 && e <= 127)
> >> +		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
> >> +	else if (e > 127 && e <= 255)
> >> +		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
> >> +	else
> >> +		dev_info(d->dev, "Unknown AE4DMA error");
> >> +}
> >> +
> >> +static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
> >> +{
> >> +	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> >> +	struct ae4dma_desc desc;
> >> +	u8 status;
> >> +
> >> +	memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
> >> +	/* Synchronize ordering */
> >> +	mb();
> > does dma_wmb() enough? 
> 
> Sure, I will change to dma_rmb which is enough for this scenario.
> 
> >
> >> +	status = desc.dw1.status;
> >> +	if (status && status != AE4_DESC_COMPLETED) {
> >> +		cmd_q->cmd_error = desc.dw1.err_code;
> >> +		if (cmd_q->cmd_error)
> >> +			ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
> >> +	}
> >> +}
> >> +
> >> +static void ae4_pending_work(struct work_struct *work)
> >> +{
> >> +	struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
> >> +	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> >> +	struct pt_cmd *cmd;
> >> +	u32 cridx, dridx;
> >> +
> >> +	while (true) {
> >> +		wait_event_interruptible(ae4cmd_q->q_w,
> >> +					 ((atomic64_read(&ae4cmd_q->done_cnt)) <
> >> +					   atomic64_read(&ae4cmd_q->intr_cnt)));
> > wait_event_interruptible_timeout() ? to avoid patental deadlock.
> 
> A thread will be created and started for each queue initially. These threads will wait for any DMA
> operation to complete quickly. If there are no DMA operations, the threads will remain idle, but
> there won't be a deadlock.
> 
> >
> >> +
> >> +		atomic64_inc(&ae4cmd_q->done_cnt);
> >> +
> >> +		mutex_lock(&ae4cmd_q->cmd_lock);
> >> +
> >> +		cridx = readl(cmd_q->reg_control + 0x0C);
> >> +		dridx = atomic_read(&ae4cmd_q->dridx);
> >> +
> >> +		while ((dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
> >> +			cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
> >> +			list_del(&cmd->entry);
> >> +
> >> +			ae4_check_status_error(ae4cmd_q, dridx);
> >> +			cmd->pt_cmd_callback(cmd->data, cmd->ret);
> >> +
> >> +			atomic64_dec(&ae4cmd_q->q_cmd_count);
> >> +			dridx = (dridx + 1) % CMD_Q_LEN;
> >> +			atomic_set(&ae4cmd_q->dridx, dridx);
> >> +			/* Synchronize ordering */
> >> +			mb();
> >> +
> >> +			complete_all(&ae4cmd_q->cmp);
> >> +		}
> >> +
> >> +		mutex_unlock(&ae4cmd_q->cmd_lock);
> >> +	}
> >> +}
> >> +
> >> +static irqreturn_t ae4_core_irq_handler(int irq, void *data)
> >> +{
> >> +	struct ae4_cmd_queue *ae4cmd_q = data;
> >> +	struct pt_cmd_queue *cmd_q;
> >> +	struct pt_device *pt;
> >> +	u32 status;
> >> +
> >> +	cmd_q = &ae4cmd_q->cmd_q;
> >> +	pt = cmd_q->pt;
> >> +
> >> +	pt->total_interrupts++;
> >> +	atomic64_inc(&ae4cmd_q->intr_cnt);
> >> +
> >> +	wake_up(&ae4cmd_q->q_w);
> >> +
> >> +	status = readl(cmd_q->reg_control + 0x14);
> >> +	if (status & BIT(0)) {
> >> +		status &= GENMASK(31, 1);
> >> +		writel(status, cmd_q->reg_control + 0x14);
> >> +	}
> >> +
> >> +	return IRQ_HANDLED;
> >> +}
> >> +
> >> +void ae4_destroy_work(struct ae4_device *ae4)
> >> +{
> >> +	struct ae4_cmd_queue *ae4cmd_q;
> >> +	int i;
> >> +
> >> +	for (i = 0; i < ae4->cmd_q_count; i++) {
> >> +		ae4cmd_q = &ae4->ae4cmd_q[i];
> >> +
> >> +		if (!ae4cmd_q->pws)
> >> +			break;
> >> +
> >> +		cancel_delayed_work(&ae4cmd_q->p_work);
> > do you need cancel_delayed_work_sync()?
> 
> Sure, I will change to cancel_delayed_work_sync.
> 
> >
> >> +		destroy_workqueue(ae4cmd_q->pws);
> >> +	}
> >> +}
> >> +
> >> +int ae4_core_init(struct ae4_device *ae4)
> >> +{
> >> +	struct pt_device *pt = &ae4->pt;
> >> +	struct ae4_cmd_queue *ae4cmd_q;
> >> +	struct device *dev = pt->dev;
> >> +	struct pt_cmd_queue *cmd_q;
> >> +	int i, ret = 0;
> >> +
> >> +	writel(max_hw_q, pt->io_regs);
> >> +
> >> +	for (i = 0; i < max_hw_q; i++) {
> >> +		ae4cmd_q = &ae4->ae4cmd_q[i];
> >> +		ae4cmd_q->id = ae4->cmd_q_count;
> >> +		ae4->cmd_q_count++;
> >> +
> >> +		cmd_q = &ae4cmd_q->cmd_q;
> >> +		cmd_q->pt = pt;
> >> +
> >> +		/* Preset some register values (Q size is 32byte (0x20)) */
> >> +		cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
> >> +
> >> +		ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
> >> +				       dev_name(pt->dev), ae4cmd_q);
> >> +		if (ret)
> >> +			return ret;
> >> +
> >> +		cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
> >> +
> >> +		cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
> >> +						   GFP_KERNEL);
> >> +		if (!cmd_q->qbase)
> >> +			return -ENOMEM;
> >> +	}
> >> +
> >> +	for (i = 0; i < ae4->cmd_q_count; i++) {
> >> +		ae4cmd_q = &ae4->ae4cmd_q[i];
> >> +
> >> +		cmd_q = &ae4cmd_q->cmd_q;
> >> +
> >> +		/* Preset some register values (Q size is 32byte (0x20)) */
> >> +		cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
> >> +
> >> +		/* Update the device registers with queue information. */
> >> +		writel(CMD_Q_LEN, cmd_q->reg_control + 0x08);
> >> +
> >> +		cmd_q->qdma_tail = cmd_q->qbase_dma;
> >> +		writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + 0x18);
> >> +		writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + 0x1C);
> >> +
> >> +		INIT_LIST_HEAD(&ae4cmd_q->cmd);
> >> +		init_waitqueue_head(&ae4cmd_q->q_w);
> >> +
> >> +		ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
> > Can existed workqueue match your requirement? 
> 
> Separate work queues for each queue, compared to a existing workqueue, enhance performance by enabling
> load balancing across queues, ensuring DMA command execution even under memory pressure, and
> maintaining strict isolation between tasks in different queues.
> 
> >
> > Frank
> >
> >> +		if (!ae4cmd_q->pws) {
> >> +			ae4_destroy_work(ae4);
> >> +			return -ENOMEM;
> >> +		}
> >> +		INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
> >> +		queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work,  usecs_to_jiffies(100));
> >> +
> >> +		init_completion(&ae4cmd_q->cmp);
> >> +	}
> >> +
> >> +	return ret;
> >> +}
> >> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> >> new file mode 100644
> >> index 000000000000..4cd537af757d
> >> --- /dev/null
> >> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> >> @@ -0,0 +1,195 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * AMD AE4DMA driver
> >> + *
> >> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> >> + * All Rights Reserved.
> >> + *
> >> + * Author: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> >> + */
> >> +
> >> +#include "ae4dma.h"
> >> +
> >> +static int ae4_get_msi_irq(struct ae4_device *ae4)
> >> +{
> >> +	struct pt_device *pt = &ae4->pt;
> >> +	struct device *dev = pt->dev;
> >> +	struct pci_dev *pdev;
> >> +	int ret, i;
> >> +
> >> +	pdev = to_pci_dev(dev);
> >> +	ret = pci_enable_msi(pdev);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> >> +		ae4->ae4_irq[i] = pdev->irq;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int ae4_get_msix_irqs(struct ae4_device *ae4)
> >> +{
> >> +	struct ae4_msix *ae4_msix = ae4->ae4_msix;
> >> +	struct pt_device *pt = &ae4->pt;
> >> +	struct device *dev = pt->dev;
> >> +	struct pci_dev *pdev;
> >> +	int v, i, ret;
> >> +
> >> +	pdev = to_pci_dev(dev);
> >> +
> >> +	for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
> >> +		ae4_msix->msix_entry[v].entry = v;
> >> +
> >> +	ret = pci_enable_msix_range(pdev, ae4_msix->msix_entry, 1, v);
> >> +	if (ret < 0)
> >> +		return ret;
> >> +
> >> +	ae4_msix->msix_count = ret;
> >> +
> >> +	for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> >> +		ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int ae4_get_irqs(struct ae4_device *ae4)
> >> +{
> >> +	struct pt_device *pt = &ae4->pt;
> >> +	struct device *dev = pt->dev;
> >> +	int ret;
> >> +
> >> +	ret = ae4_get_msix_irqs(ae4);
> >> +	if (!ret)
> >> +		return 0;
> >> +
> >> +	/* Couldn't get MSI-X vectors, try MSI */
> >> +	dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
> >> +	ret = ae4_get_msi_irq(ae4);
> >> +	if (!ret)
> >> +		return 0;
> >> +
> >> +	/* Couldn't get MSI interrupt */
> >> +	dev_err(dev, "could not enable MSI (%d)\n", ret);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static void ae4_free_irqs(struct ae4_device *ae4)
> >> +{
> >> +	struct ae4_msix *ae4_msix;
> >> +	struct pci_dev *pdev;
> >> +	struct pt_device *pt;
> >> +	struct device *dev;
> >> +	int i;
> >> +
> >> +	if (ae4) {
> >> +		pt = &ae4->pt;
> >> +		dev = pt->dev;
> >> +		pdev = to_pci_dev(dev);
> >> +
> >> +		ae4_msix = ae4->ae4_msix;
> >> +		if (ae4_msix && ae4_msix->msix_count)
> >> +			pci_disable_msix(pdev);
> >> +		else if (pdev->irq)
> >> +			pci_disable_msi(pdev);
> >> +
> >> +		for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> >> +			ae4->ae4_irq[i] = 0;
> >> +	}
> >> +}
> >> +
> >> +static void ae4_deinit(struct ae4_device *ae4)
> >> +{
> >> +	ae4_free_irqs(ae4);
> >> +}
> >> +
> >> +static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >> +{
> >> +	struct device *dev = &pdev->dev;
> >> +	struct ae4_device *ae4;
> >> +	struct pt_device *pt;
> >> +	int bar_mask;
> >> +	int ret = 0;
> >> +
> >> +	ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
> >> +	if (!ae4)
> >> +		return -ENOMEM;
> >> +
> >> +	ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
> >> +	if (!ae4->ae4_msix)
> >> +		return -ENOMEM;
> >> +
> >> +	ret = pcim_enable_device(pdev);
> >> +	if (ret)
> >> +		goto ae4_error;
> >> +
> >> +	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
> >> +	ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
> >> +	if (ret)
> >> +		goto ae4_error;
> >> +
> >> +	pt = &ae4->pt;
> >> +	pt->dev = dev;
> >> +
> >> +	pt->io_regs = pcim_iomap_table(pdev)[0];
> >> +	if (!pt->io_regs) {
> >> +		ret = -ENOMEM;
> >> +		goto ae4_error;
> >> +	}
> >> +
> >> +	ret = ae4_get_irqs(ae4);
> >> +	if (ret)
> >> +		goto ae4_error;
> >> +
> >> +	pci_set_master(pdev);
> >> +
> >> +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
> >> +	if (ret) {
> >> +		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
> >> +		if (ret)
> >> +			goto ae4_error;
> >> +	}
> > needn't failback to 32bit.  never return failure when bit >= 32.
> >
> > Detail see: 
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=f7ae20f2fc4e6a5e32f43c4fa2acab3281a61c81
> >
> > if (support_48bit)
> > 	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48))
> > else
> > 	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))
> >
> > you decide if support_48bit by hardware register or PID/DID
> 
> Sure, I will add only this line dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)).
> 
> >
> >
> >> +
> >> +	dev_set_drvdata(dev, ae4);
> >> +
> >> +	ret = ae4_core_init(ae4);
> >> +	if (ret)
> >> +		goto ae4_error;
> >> +
> >> +	return 0;
> >> +
> >> +ae4_error:
> >> +	ae4_deinit(ae4);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static void ae4_pci_remove(struct pci_dev *pdev)
> >> +{
> >> +	struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
> >> +
> >> +	ae4_destroy_work(ae4);
> >> +	ae4_deinit(ae4);
> >> +}
> >> +
> >> +static const struct pci_device_id ae4_pci_table[] = {
> >> +	{ PCI_VDEVICE(AMD, 0x14C8), },
> >> +	{ PCI_VDEVICE(AMD, 0x14DC), },
> >> +	{ PCI_VDEVICE(AMD, 0x149B), },
> >> +	/* Last entry must be zero */
> >> +	{ 0, }
> >> +};
> >> +MODULE_DEVICE_TABLE(pci, ae4_pci_table);
> >> +
> >> +static struct pci_driver ae4_pci_driver = {
> >> +	.name = "ae4dma",
> >> +	.id_table = ae4_pci_table,
> >> +	.probe = ae4_pci_probe,
> >> +	.remove = ae4_pci_remove,
> >> +};
> >> +
> >> +module_pci_driver(ae4_pci_driver);
> >> +
> >> +MODULE_LICENSE("GPL");
> >> +MODULE_DESCRIPTION("AMD AE4DMA driver");
> >> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
> >> new file mode 100644
> >> index 000000000000..24b1253ad570
> >> --- /dev/null
> >> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
> >> @@ -0,0 +1,77 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * AMD AE4DMA driver
> >> + *
> >> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> >> + * All Rights Reserved.
> >> + *
> >> + * Author: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> >> + */
> >> +#ifndef __AE4DMA_H__
> >> +#define __AE4DMA_H__
> >> +
> >> +#include "../common/amd_dma.h"
> >> +
> >> +#define MAX_AE4_HW_QUEUES		16
> >> +
> >> +#define AE4_DESC_COMPLETED		0x3
> >> +
> >> +struct ae4_msix {
> >> +	int msix_count;
> >> +	struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
> >> +};
> >> +
> >> +struct ae4_cmd_queue {
> >> +	struct ae4_device *ae4;
> >> +	struct pt_cmd_queue cmd_q;
> >> +	struct list_head cmd;
> >> +	/* protect command operations */
> >> +	struct mutex cmd_lock;
> >> +	struct delayed_work p_work;
> >> +	struct workqueue_struct *pws;
> >> +	struct completion cmp;
> >> +	wait_queue_head_t q_w;
> >> +	atomic64_t intr_cnt;
> >> +	atomic64_t done_cnt;
> >> +	atomic64_t q_cmd_count;
> >> +	atomic_t dridx;
> >> +	unsigned int id;
> >> +};
> >> +
> >> +union dwou {
> >> +	u32 dw0;
> >> +	struct dword0 {
> >> +	u8	byte0;
> >> +	u8	byte1;
> >> +	u16	timestamp;
> >> +	} dws;
> >> +};
> >> +
> >> +struct dword1 {
> >> +	u8	status;
> >> +	u8	err_code;
> >> +	u16	desc_id;
> >> +};
> >> +
> >> +struct ae4dma_desc {
> >> +	union dwou dwouv;
> >> +	struct dword1 dw1;
> >> +	u32 length;
> >> +	u32 rsvd;
> >> +	u32 src_hi;
> >> +	u32 src_lo;
> >> +	u32 dst_hi;
> >> +	u32 dst_lo;
> >> +};
> >> +
> >> +struct ae4_device {
> >> +	struct pt_device pt;
> >> +	struct ae4_msix *ae4_msix;
> >> +	struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
> >> +	unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
> >> +	unsigned int cmd_q_count;
> >> +};
> >> +
> >> +int ae4_core_init(struct ae4_device *ae4);
> >> +void ae4_destroy_work(struct ae4_device *ae4);
> >> +#endif
> >> diff --git a/drivers/dma/amd/common/amd_dma.h b/drivers/dma/amd/common/amd_dma.h
> >> new file mode 100644
> >> index 000000000000..31c35b3bc94b
> >> --- /dev/null
> >> +++ b/drivers/dma/amd/common/amd_dma.h
> >> @@ -0,0 +1,26 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * AMD DMA Driver common
> >> + *
> >> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> >> + * All Rights Reserved.
> >> + *
> >> + * Author: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
> >> + */
> >> +
> >> +#ifndef AMD_DMA_H
> >> +#define AMD_DMA_H
> >> +
> >> +#include <linux/device.h>
> >> +#include <linux/dmaengine.h>
> >> +#include <linux/pci.h>
> >> +#include <linux/spinlock.h>
> >> +#include <linux/mutex.h>
> >> +#include <linux/list.h>
> >> +#include <linux/wait.h>
> >> +#include <linux/dmapool.h>
> > order by alphabet
> 
> Sure, I will change it accordingly.
> 
> Thanks,
> --
> Basavaraj
> 
> >
> >> +
> >> +#include "../ptdma/ptdma.h"
> >> +#include "../../virt-dma.h"
> >> +
> >> +#endif
> >> -- 
> >> 2.25.1
> >>
>