On 05-03-24, 13:49, Lizhi Hou wrote: > From: Nishad Saraf <nishads@xxxxxxx> > > Adds driver to enable PCIe board which uses AMD QDMA (the Queue-based > Direct Memory Access) subsystem. For example, Xilinx Alveo V70 AI > Accelerator devices. > https://www.xilinx.com/applications/data-center/v70.html > > The QDMA subsystem is used in conjunction with the PCI Express IP block > to provide high performance data transfer between host memory and the > card's DMA subsystem. > > +-------+ +-------+ +-----------+ > PCIe | | | | | | > Tx/Rx | | | | AXI | | > <=======> | PCIE | <===> | QDMA | <====>| User Logic| > | | | | | | > +-------+ +-------+ +-----------+ > > The primary mechanism to transfer data using the QDMA is for the QDMA > engine to operate on instructions (descriptors) provided by the host > operating system. Using the descriptors, the QDMA can move data in both > the Host to Card (H2C) direction, or the Card to Host (C2H) direction. > The QDMA provides a per-queue basis option whether DMA traffic goes > to an AXI4 memory map (MM) interface or to an AXI4-Stream interface. > > The hardware detail is provided by > https://docs.xilinx.com/r/en-US/pg302-qdma > > Implements dmaengine APIs to support MM DMA transfers. > - probe the available DMA channels > - use dma_slave_map for channel lookup > - use virtual channel to manage dmaengine tx descriptors > - implement device_prep_slave_sg callback to handle host scatter gather > list > - implement descriptor metadata operations to set device address for DMA > transfer > > Signed-off-by: Nishad Saraf <nishads@xxxxxxx> > Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx> > --- > MAINTAINERS | 8 + > drivers/dma/Kconfig | 2 + > drivers/dma/Makefile | 1 + > drivers/dma/amd/Kconfig | 14 + > drivers/dma/amd/Makefile | 6 + > drivers/dma/amd/qdma/Makefile | 8 + > drivers/dma/amd/qdma/qdma-comm-regs.c | 64 ++ > drivers/dma/amd/qdma/qdma.c | 1162 ++++++++++++++++++++++++ > drivers/dma/amd/qdma/qdma.h | 265 ++++++ > include/linux/platform_data/amd_qdma.h | 36 + > 10 files changed, 1566 insertions(+) > create mode 100644 drivers/dma/amd/Kconfig > create mode 100644 drivers/dma/amd/Makefile > create mode 100644 drivers/dma/amd/qdma/Makefile > create mode 100644 drivers/dma/amd/qdma/qdma-comm-regs.c > create mode 100644 drivers/dma/amd/qdma/qdma.c > create mode 100644 drivers/dma/amd/qdma/qdma.h > create mode 100644 include/linux/platform_data/amd_qdma.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index 9ed4d3868539..d438214685d9 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -1063,6 +1063,14 @@ L: dmaengine@xxxxxxxxxxxxxxx > S: Maintained > F: drivers/dma/ptdma/ > > +AMD QDMA DRIVER > +M: Nishad Saraf <nishads@xxxxxxx> > +M: Lizhi Hou <lizhi.hou@xxxxxxx> > +L: dmaengine@xxxxxxxxxxxxxxx > +S: Supported > +F: drivers/dma/amd/qdma/ > +F: include/linux/platform_data/amd_qdma.h > + > AMD SEATTLE DEVICE TREE SUPPORT > M: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > M: Tom Lendacky <thomas.lendacky@xxxxxxx> > diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig > index e928f2ca0f1e..62d09090b94d 100644 > --- a/drivers/dma/Kconfig > +++ b/drivers/dma/Kconfig > @@ -748,6 +748,8 @@ config XILINX_ZYNQMP_DPDMA > display driver. > > # driver files > +source "drivers/dma/amd/Kconfig" > + > source "drivers/dma/bestcomm/Kconfig" > > source "drivers/dma/mediatek/Kconfig" > diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile > index dfd40d14e408..f2c614fcf2a4 100644 > --- a/drivers/dma/Makefile > +++ b/drivers/dma/Makefile > @@ -84,6 +84,7 @@ obj-$(CONFIG_ST_FDMA) += st_fdma.o > obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/ > obj-$(CONFIG_INTEL_LDMA) += lgm/ > > +obj-y += amd/ > obj-y += mediatek/ > obj-y += qcom/ > obj-y += ti/ > diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig > new file mode 100644 > index 000000000000..7d1f51d69675 > --- /dev/null > +++ b/drivers/dma/amd/Kconfig > @@ -0,0 +1,14 @@ > +# SPDX-License-Identifier: GPL-2.0-only > + > +config AMD_QDMA > + tristate "AMD Queue-based DMA" > + depends on HAS_IOMEM > + select DMA_ENGINE > + select DMA_VIRTUAL_CHANNELS > + select REGMAP_MMIO > + help > + Enable support for the AMD Queue-based DMA subsystem. The primary > + mechanism to transfer data using the QDMA is for the QDMA engine to > + operate on instructions (descriptors) provided by the host operating > + system. Using the descriptors, the QDMA can move data in either the > + Host to Card (H2C) direction or the Card to Host (C2H) direction. > diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile > new file mode 100644 > index 000000000000..99c77de9a9dd > --- /dev/null > +++ b/drivers/dma/amd/Makefile > @@ -0,0 +1,6 @@ > +# SPDX-License-Identifier: GPL-2.0 > +# > +# Copyright (C) 2023, Advanced Micro Devices, Inc. Copyright for Makefile? > +# > + > +obj-$(CONFIG_AMD_QDMA) += qdma/ > diff --git a/drivers/dma/amd/qdma/Makefile b/drivers/dma/amd/qdma/Makefile > new file mode 100644 > index 000000000000..ba53971d2714 > --- /dev/null > +++ b/drivers/dma/amd/qdma/Makefile > @@ -0,0 +1,8 @@ > +# SPDX-License-Identifier: GPL-2.0 > +# > +# Copyright (C) 2023, Advanced Micro Devices, Inc. 2024 > +# > + > +obj-$(CONFIG_AMD_QDMA) += amd-qdma.o > + > +amd-qdma-$(CONFIG_AMD_QDMA) := qdma.o qdma-comm-regs.o > diff --git a/drivers/dma/amd/qdma/qdma-comm-regs.c b/drivers/dma/amd/qdma/qdma-comm-regs.c > new file mode 100644 > index 000000000000..14c7a73268b4 > --- /dev/null > +++ b/drivers/dma/amd/qdma/qdma-comm-regs.c > @@ -0,0 +1,64 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (C) 2023, Advanced Micro Devices, Inc. > + */ > + > +#ifndef __QDMA_REGS_DEF_H > +#define __QDMA_REGS_DEF_H > + > +#include "qdma.h" > + > +const struct qdma_reg qdma_regos_default[QDMA_REGO_MAX] = { > + [QDMA_REGO_CTXT_DATA] = QDMA_REGO(0x804, 8), > + [QDMA_REGO_CTXT_CMD] = QDMA_REGO(0x844, 1), > + [QDMA_REGO_CTXT_MASK] = QDMA_REGO(0x824, 8), > + [QDMA_REGO_MM_H2C_CTRL] = QDMA_REGO(0x1004, 1), > + [QDMA_REGO_MM_C2H_CTRL] = QDMA_REGO(0x1204, 1), > + [QDMA_REGO_QUEUE_COUNT] = QDMA_REGO(0x120, 1), > + [QDMA_REGO_RING_SIZE] = QDMA_REGO(0x204, 1), > + [QDMA_REGO_H2C_PIDX] = QDMA_REGO(0x18004, 1), > + [QDMA_REGO_C2H_PIDX] = QDMA_REGO(0x18008, 1), > + [QDMA_REGO_INTR_CIDX] = QDMA_REGO(0x18000, 1), > + [QDMA_REGO_FUNC_ID] = QDMA_REGO(0x12c, 1), > + [QDMA_REGO_ERR_INT] = QDMA_REGO(0xb04, 1), > + [QDMA_REGO_ERR_STAT] = QDMA_REGO(0x248, 1), > +}; > + > +const struct qdma_reg_field qdma_regfs_default[QDMA_REGF_MAX] = { > + /* QDMA_REGO_CTXT_DATA fields */ > + [QDMA_REGF_IRQ_ENABLE] = QDMA_REGF(53, 53), > + [QDMA_REGF_WBK_ENABLE] = QDMA_REGF(52, 52), > + [QDMA_REGF_WBI_CHECK] = QDMA_REGF(34, 34), > + [QDMA_REGF_IRQ_ARM] = QDMA_REGF(16, 16), > + [QDMA_REGF_IRQ_VEC] = QDMA_REGF(138, 128), > + [QDMA_REGF_IRQ_AGG] = QDMA_REGF(139, 139), > + [QDMA_REGF_WBI_INTVL_ENABLE] = QDMA_REGF(35, 35), > + [QDMA_REGF_MRKR_DISABLE] = QDMA_REGF(62, 62), > + [QDMA_REGF_QUEUE_ENABLE] = QDMA_REGF(32, 32), > + [QDMA_REGF_QUEUE_MODE] = QDMA_REGF(63, 63), > + [QDMA_REGF_DESC_BASE] = QDMA_REGF(127, 64), > + [QDMA_REGF_DESC_SIZE] = QDMA_REGF(49, 48), > + [QDMA_REGF_RING_ID] = QDMA_REGF(47, 44), > + [QDMA_REGF_QUEUE_BASE] = QDMA_REGF(11, 0), > + [QDMA_REGF_QUEUE_MAX] = QDMA_REGF(44, 32), > + [QDMA_REGF_FUNCTION_ID] = QDMA_REGF(24, 17), > + [QDMA_REGF_INTR_AGG_BASE] = QDMA_REGF(66, 15), > + [QDMA_REGF_INTR_VECTOR] = QDMA_REGF(11, 1), > + [QDMA_REGF_INTR_SIZE] = QDMA_REGF(69, 67), > + [QDMA_REGF_INTR_VALID] = QDMA_REGF(0, 0), > + [QDMA_REGF_INTR_COLOR] = QDMA_REGF(14, 14), > + [QDMA_REGF_INTR_FUNCTION_ID] = QDMA_REGF(125, 114), > + /* QDMA_REGO_CTXT_CMD fields */ > + [QDMA_REGF_CMD_INDX] = QDMA_REGF(19, 7), > + [QDMA_REGF_CMD_CMD] = QDMA_REGF(6, 5), > + [QDMA_REGF_CMD_TYPE] = QDMA_REGF(4, 1), > + [QDMA_REGF_CMD_BUSY] = QDMA_REGF(0, 0), > + /* QDMA_REGO_QUEUE_COUNT fields */ > + [QDMA_REGF_QUEUE_COUNT] = QDMA_REGF(11, 0), > + /* QDMA_REGO_ERR_INT fields */ > + [QDMA_REGF_ERR_INT_FUNC] = QDMA_REGF(11, 0), > + [QDMA_REGF_ERR_INT_VEC] = QDMA_REGF(22, 12), > + [QDMA_REGF_ERR_INT_ARM] = QDMA_REGF(24, 24), > +}; > + > +#endif /* __QDMA_REGS_DEF_H */ > diff --git a/drivers/dma/amd/qdma/qdma.c b/drivers/dma/amd/qdma/qdma.c > new file mode 100644 > index 000000000000..a1427a72c002 > --- /dev/null > +++ b/drivers/dma/amd/qdma/qdma.c > @@ -0,0 +1,1162 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * DMA driver for AMD Queue-based DMA Subsystem > + * > + * Copyright (C) 2023, Advanced Micro Devices, Inc. > + */ > +#include <linux/bitfield.h> > +#include <linux/bitops.h> > +#include <linux/dmaengine.h> > +#include <linux/module.h> > +#include <linux/mod_devicetable.h> > +#include <linux/dma-map-ops.h> > +#include <linux/platform_device.h> > +#include <linux/platform_data/amd_qdma.h> > +#include <linux/regmap.h> > + > +#include "qdma.h" > + > +#define CHAN_STR(q) (((q)->dir == DMA_MEM_TO_DEV) ? "H2C" : "C2H") > +#define QDMA_REG_OFF(d, r) ((d)->roffs[r].off) > + > +/* MMIO regmap config for all QDMA registers */ > +static const struct regmap_config qdma_regmap_config = { > + .reg_bits = 32, > + .val_bits = 32, > + .reg_stride = 4, > +}; > + > +static inline struct qdma_queue *to_qdma_queue(struct dma_chan *chan) > +{ > + return container_of(chan, struct qdma_queue, vchan.chan); > +} > + > +static inline struct qdma_mm_vdesc *to_qdma_vdesc(struct virt_dma_desc *vdesc) > +{ > + return container_of(vdesc, struct qdma_mm_vdesc, vdesc); > +} > + > +static inline u32 qdma_get_intr_ring_idx(struct qdma_device *qdev) > +{ > + u32 idx; > + > + idx = qdev->qintr_rings[qdev->qintr_ring_idx++].ridx; > + qdev->qintr_ring_idx %= qdev->qintr_ring_num; > + > + return idx; > +} > + > +static u64 qdma_get_field(const struct qdma_device *qdev, const u32 *data, > + enum qdma_reg_fields field) > +{ > + const struct qdma_reg_field *f = &qdev->rfields[field]; > + u16 low_pos, hi_pos, low_bit, hi_bit; > + u64 value = 0, mask; > + > + low_pos = f->lsb / BITS_PER_TYPE(*data); > + hi_pos = f->msb / BITS_PER_TYPE(*data); > + > + if (low_pos == hi_pos) { > + low_bit = f->lsb % BITS_PER_TYPE(*data); > + hi_bit = f->msb % BITS_PER_TYPE(*data); why not use {upper|lower}_xx_bits > + mask = GENMASK(hi_bit, low_bit); > + value = (data[low_pos] & mask) >> low_bit; > + } else if (hi_pos == low_pos + 1) { > + low_bit = f->lsb % BITS_PER_TYPE(*data); > + hi_bit = low_bit + (f->msb - f->lsb); > + value = ((u64)data[hi_pos] << BITS_PER_TYPE(*data)) | > + data[low_pos]; > + mask = GENMASK_ULL(hi_bit, low_bit); > + value = (value & mask) >> low_bit; > + } else { > + hi_bit = f->msb % BITS_PER_TYPE(*data); > + mask = GENMASK(hi_bit, 0); > + value = data[hi_pos] & mask; > + low_bit = f->msb - f->lsb - hi_bit; > + value <<= low_bit; > + low_bit -= 32; > + value |= (u64)data[hi_pos - 1] << low_bit; > + mask = GENMASK(31, 32 - low_bit); > + value |= (data[hi_pos - 2] & mask) >> low_bit; > + } > + > + return value; > +} > + > +static void qdma_set_field(const struct qdma_device *qdev, u32 *data, > + enum qdma_reg_fields field, u64 value) > +{ > + const struct qdma_reg_field *f = &qdev->rfields[field]; > + u16 low_pos, hi_pos, low_bit; > + > + low_pos = f->lsb / BITS_PER_TYPE(*data); > + hi_pos = f->msb / BITS_PER_TYPE(*data); > + low_bit = f->lsb % BITS_PER_TYPE(*data); > + > + data[low_pos++] |= value << low_bit; > + if (low_pos <= hi_pos) > + data[low_pos++] |= (u32)(value >> (32 - low_bit)); > + if (low_pos <= hi_pos) > + data[low_pos] |= (u32)(value >> (64 - low_bit)); > +} > + > +static inline int qdma_reg_write(const struct qdma_device *qdev, > + const u32 *data, enum qdma_regs reg) > +{ > + const struct qdma_reg *r = &qdev->roffs[reg]; > + int ret; > + > + if (r->count > 1) > + ret = regmap_bulk_write(qdev->regmap, r->off, data, r->count); > + else > + ret = regmap_write(qdev->regmap, r->off, *data); > + > + return ret; > +} > + > +static inline int qdma_reg_read(const struct qdma_device *qdev, u32 *data, > + enum qdma_regs reg) > +{ > + const struct qdma_reg *r = &qdev->roffs[reg]; > + int ret; > + > + if (r->count > 1) > + ret = regmap_bulk_read(qdev->regmap, r->off, data, r->count); > + else > + ret = regmap_read(qdev->regmap, r->off, data); > + > + return ret; > +} > + > +static int qdma_context_cmd_execute(const struct qdma_device *qdev, > + enum qdma_ctxt_type type, > + enum qdma_ctxt_cmd cmd, u16 index) > +{ > + u32 value = 0; > + int ret; > + > + qdma_set_field(qdev, &value, QDMA_REGF_CMD_INDX, index); > + qdma_set_field(qdev, &value, QDMA_REGF_CMD_CMD, cmd); > + qdma_set_field(qdev, &value, QDMA_REGF_CMD_TYPE, type); > + > + ret = qdma_reg_write(qdev, &value, QDMA_REGO_CTXT_CMD); > + if (ret) > + return ret; > + > + ret = regmap_read_poll_timeout(qdev->regmap, > + QDMA_REG_OFF(qdev, QDMA_REGO_CTXT_CMD), > + value, > + !qdma_get_field(qdev, &value, > + QDMA_REGF_CMD_BUSY), > + QDMA_POLL_INTRVL_US, > + QDMA_POLL_TIMEOUT_US); > + if (ret) { > + qdma_err(qdev, "Context command execution timed out"); > + return ret; > + } > + > + return 0; > +} > + > +static int qdma_context_write_data(const struct qdma_device *qdev, > + const u32 *data) > +{ > + u32 mask[QDMA_CTXT_REGMAP_LEN]; > + int ret; > + > + memset(mask, ~0, sizeof(mask)); > + > + ret = qdma_reg_write(qdev, mask, QDMA_REGO_CTXT_MASK); > + if (ret) > + return ret; > + > + ret = qdma_reg_write(qdev, data, QDMA_REGO_CTXT_DATA); > + if (ret) > + return ret; > + > + return 0; > +} > + > +static void qdma_prep_sw_desc_context(const struct qdma_device *qdev, > + const struct qdma_ctxt_sw_desc *ctxt, > + u32 *data) > +{ > + memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data)); > + qdma_set_field(qdev, data, QDMA_REGF_DESC_BASE, ctxt->desc_base); > + qdma_set_field(qdev, data, QDMA_REGF_IRQ_VEC, ctxt->vec); > + qdma_set_field(qdev, data, QDMA_REGF_FUNCTION_ID, qdev->fid); > + > + qdma_set_field(qdev, data, QDMA_REGF_DESC_SIZE, QDMA_DESC_SIZE_32B); > + qdma_set_field(qdev, data, QDMA_REGF_RING_ID, QDMA_DEFAULT_RING_ID); > + qdma_set_field(qdev, data, QDMA_REGF_QUEUE_MODE, QDMA_QUEUE_OP_MM); > + qdma_set_field(qdev, data, QDMA_REGF_IRQ_ENABLE, 1); > + qdma_set_field(qdev, data, QDMA_REGF_WBK_ENABLE, 1); > + qdma_set_field(qdev, data, QDMA_REGF_WBI_CHECK, 1); > + qdma_set_field(qdev, data, QDMA_REGF_IRQ_ARM, 1); > + qdma_set_field(qdev, data, QDMA_REGF_IRQ_AGG, 1); > + qdma_set_field(qdev, data, QDMA_REGF_WBI_INTVL_ENABLE, 1); > + qdma_set_field(qdev, data, QDMA_REGF_QUEUE_ENABLE, 1); > + qdma_set_field(qdev, data, QDMA_REGF_MRKR_DISABLE, 1); > +} > + > +static void qdma_prep_intr_context(const struct qdma_device *qdev, > + const struct qdma_ctxt_intr *ctxt, > + u32 *data) > +{ > + memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data)); > + qdma_set_field(qdev, data, QDMA_REGF_INTR_AGG_BASE, ctxt->agg_base); > + qdma_set_field(qdev, data, QDMA_REGF_INTR_VECTOR, ctxt->vec); > + qdma_set_field(qdev, data, QDMA_REGF_INTR_SIZE, ctxt->size); > + qdma_set_field(qdev, data, QDMA_REGF_INTR_VALID, ctxt->valid); > + qdma_set_field(qdev, data, QDMA_REGF_INTR_COLOR, ctxt->color); > + qdma_set_field(qdev, data, QDMA_REGF_INTR_FUNCTION_ID, qdev->fid); > +} > + > +static void qdma_prep_fmap_context(const struct qdma_device *qdev, > + const struct qdma_ctxt_fmap *ctxt, > + u32 *data) > +{ > + memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data)); > + qdma_set_field(qdev, data, QDMA_REGF_QUEUE_BASE, ctxt->qbase); > + qdma_set_field(qdev, data, QDMA_REGF_QUEUE_MAX, ctxt->qmax); > +} > + > +/* > + * Program the indirect context register space > + * > + * Once the queue is enabled, context is dynamically updated by hardware. Any > + * modification of the context through this API when the queue is enabled can > + * result in unexpected behavior. Reading the context when the queue is enabled > + * is not recommended as it can result in reduced performance. > + */ > +static int qdma_prog_context(struct qdma_device *qdev, enum qdma_ctxt_type type, > + enum qdma_ctxt_cmd cmd, u16 index, u32 *ctxt) > +{ > + int ret; > + > + mutex_lock(&qdev->ctxt_lock); > + if (cmd == QDMA_CTXT_WRITE) { > + ret = qdma_context_write_data(qdev, ctxt); > + if (ret) > + goto failed; > + } > + > + ret = qdma_context_cmd_execute(qdev, type, cmd, index); > + if (ret) > + goto failed; > + > + if (cmd == QDMA_CTXT_READ) { > + ret = qdma_reg_read(qdev, ctxt, QDMA_REGO_CTXT_DATA); > + if (ret) > + goto failed; > + } > + > +failed: > + mutex_unlock(&qdev->ctxt_lock); > + > + return ret; > +} > + > +static int qdma_check_queue_status(struct qdma_device *qdev, > + enum dma_transfer_direction dir, u16 qid) > +{ > + u32 status, data[QDMA_CTXT_REGMAP_LEN] = {0}; > + enum qdma_ctxt_type type; > + int ret; > + > + if (dir == DMA_MEM_TO_DEV) > + type = QDMA_CTXT_DESC_SW_H2C; > + else > + type = QDMA_CTXT_DESC_SW_C2H; > + > + ret = qdma_prog_context(qdev, type, QDMA_CTXT_READ, qid, data); > + if (ret) > + return ret; > + > + status = qdma_get_field(qdev, data, QDMA_REGF_QUEUE_ENABLE); > + if (status) { > + qdma_err(qdev, "queue %d already in use", qid); > + return -EBUSY; > + } > + > + return 0; > +} > + > +static int qdma_clear_queue_context(const struct qdma_queue *queue) > +{ > + enum qdma_ctxt_type h2c_types[] = { QDMA_CTXT_DESC_SW_H2C, > + QDMA_CTXT_DESC_HW_H2C, > + QDMA_CTXT_DESC_CR_H2C, > + QDMA_CTXT_PFTCH, }; > + enum qdma_ctxt_type c2h_types[] = { QDMA_CTXT_DESC_SW_C2H, > + QDMA_CTXT_DESC_HW_C2H, > + QDMA_CTXT_DESC_CR_C2H, > + QDMA_CTXT_PFTCH, }; > + struct qdma_device *qdev = queue->qdev; > + enum qdma_ctxt_type *type; > + int ret, num, i; > + > + if (queue->dir == DMA_MEM_TO_DEV) { > + type = h2c_types; > + num = ARRAY_SIZE(h2c_types); > + } else { > + type = c2h_types; > + num = ARRAY_SIZE(c2h_types); > + } > + for (i = 0; i < num; i++) { > + ret = qdma_prog_context(qdev, type[i], QDMA_CTXT_CLEAR, > + queue->qid, NULL); > + if (ret) { > + qdma_err(qdev, "Failed to clear ctxt %d", type[i]); > + return ret; > + } > + } > + > + return 0; > +} > + > +static int qdma_setup_fmap_context(struct qdma_device *qdev) > +{ > + u32 ctxt[QDMA_CTXT_REGMAP_LEN]; > + struct qdma_ctxt_fmap fmap; > + int ret; > + > + ret = qdma_prog_context(qdev, QDMA_CTXT_FMAP, QDMA_CTXT_CLEAR, > + qdev->fid, NULL); > + if (ret) { > + qdma_err(qdev, "Failed clearing context"); > + return ret; > + } > + > + fmap.qbase = 0; > + fmap.qmax = qdev->chan_num * 2; > + qdma_prep_fmap_context(qdev, &fmap, ctxt); > + ret = qdma_prog_context(qdev, QDMA_CTXT_FMAP, QDMA_CTXT_WRITE, > + qdev->fid, ctxt); > + if (ret) > + qdma_err(qdev, "Failed setup fmap, ret %d", ret); > + > + return ret; > +} > + > +static int qdma_setup_queue_context(struct qdma_device *qdev, > + const struct qdma_ctxt_sw_desc *sw_desc, > + enum dma_transfer_direction dir, u16 qid) > +{ > + u32 ctxt[QDMA_CTXT_REGMAP_LEN]; > + enum qdma_ctxt_type type; > + int ret; > + > + if (dir == DMA_MEM_TO_DEV) > + type = QDMA_CTXT_DESC_SW_H2C; > + else > + type = QDMA_CTXT_DESC_SW_C2H; > + > + qdma_prep_sw_desc_context(qdev, sw_desc, ctxt); > + /* Setup SW descriptor context */ > + ret = qdma_prog_context(qdev, type, QDMA_CTXT_WRITE, qid, ctxt); > + if (ret) > + qdma_err(qdev, "Failed setup SW desc ctxt for queue: %d", qid); > + > + return ret; > +} > + > +/* > + * Enable or disable memory-mapped DMA engines > + * 1: enable, 0: disable > + */ > +static int qdma_sgdma_control(struct qdma_device *qdev, u32 ctrl) > +{ > + int ret; > + > + ret = qdma_reg_write(qdev, &ctrl, QDMA_REGO_MM_H2C_CTRL); > + ret |= qdma_reg_write(qdev, &ctrl, QDMA_REGO_MM_C2H_CTRL); > + > + return ret; > +} > + > +static int qdma_get_hw_info(struct qdma_device *qdev) > +{ > + struct qdma_platdata *pdata = dev_get_platdata(&qdev->pdev->dev); > + u32 value = 0; > + int ret; > + > + ret = qdma_reg_read(qdev, &value, QDMA_REGO_QUEUE_COUNT); > + if (ret) > + return ret; > + > + value = qdma_get_field(qdev, &value, QDMA_REGF_QUEUE_COUNT) + 1; > + if (pdata->max_mm_channels * 2 > value) { > + qdma_err(qdev, "not enough hw queues %d", value); > + return -EINVAL; > + } > + qdev->chan_num = pdata->max_mm_channels; > + > + ret = qdma_reg_read(qdev, &qdev->fid, QDMA_REGO_FUNC_ID); > + if (ret) > + return ret; > + > + qdma_info(qdev, "max channel %d, function id %d", > + qdev->chan_num, qdev->fid); > + > + return 0; > +} > + > +static inline int qdma_update_pidx(const struct qdma_queue *queue, u16 pidx) > +{ > + struct qdma_device *qdev = queue->qdev; > + > + return regmap_write(qdev->regmap, queue->pidx_reg, > + pidx | QDMA_QUEUE_ARM_BIT); > +} > + > +static inline int qdma_update_cidx(const struct qdma_queue *queue, > + u16 ridx, u16 cidx) > +{ > + struct qdma_device *qdev = queue->qdev; > + > + return regmap_write(qdev->regmap, queue->cidx_reg, > + ((u32)ridx << 16) | cidx); > +} > + > +/** > + * qdma_free_vdesc - Free descriptor > + * @vdesc: Virtual DMA descriptor > + */ > +static void qdma_free_vdesc(struct virt_dma_desc *vdesc) > +{ > + struct qdma_mm_vdesc *vd = to_qdma_vdesc(vdesc); > + > + kfree(vd); > +} > + > +static int qdma_alloc_queues(struct qdma_device *qdev, > + enum dma_transfer_direction dir) > +{ > + struct qdma_queue *q, **queues; > + u32 i, pidx_base; > + int ret; > + > + if (dir == DMA_MEM_TO_DEV) { > + queues = &qdev->h2c_queues; > + pidx_base = QDMA_REG_OFF(qdev, QDMA_REGO_H2C_PIDX); > + } else { > + queues = &qdev->c2h_queues; > + pidx_base = QDMA_REG_OFF(qdev, QDMA_REGO_C2H_PIDX); > + } > + > + *queues = devm_kcalloc(&qdev->pdev->dev, qdev->chan_num, sizeof(*q), > + GFP_KERNEL); > + if (!*queues) > + return -ENOMEM; > + > + for (i = 0; i < qdev->chan_num; i++) { > + ret = qdma_check_queue_status(qdev, dir, i); > + if (ret) > + return ret; > + > + q = &(*queues)[i]; > + q->ring_size = QDMA_DEFAULT_RING_SIZE; > + q->idx_mask = q->ring_size - 2; > + q->qdev = qdev; > + q->dir = dir; > + q->qid = i; > + q->pidx_reg = pidx_base + i * QDMA_DMAP_REG_STRIDE; > + q->cidx_reg = QDMA_REG_OFF(qdev, QDMA_REGO_INTR_CIDX) + > + i * QDMA_DMAP_REG_STRIDE; > + q->vchan.desc_free = qdma_free_vdesc; > + vchan_init(&q->vchan, &qdev->dma_dev); > + } > + > + return 0; > +} > + > +static int qdma_device_verify(struct qdma_device *qdev) > +{ > + u32 value; > + int ret; > + > + ret = regmap_read(qdev->regmap, QDMA_IDENTIFIER_REGOFF, &value); > + if (ret) > + return ret; > + > + value = FIELD_GET(QDMA_IDENTIFIER_MASK, value); > + if (value != QDMA_IDENTIFIER) { > + qdma_err(qdev, "Invalid identifier"); > + return -ENODEV; > + } > + qdev->rfields = qdma_regfs_default; > + qdev->roffs = qdma_regos_default; > + > + return 0; > +} > + > +static int qdma_device_setup(struct qdma_device *qdev) > +{ > + struct device *dev = &qdev->pdev->dev; > + u32 ring_sz = QDMA_DEFAULT_RING_SIZE; > + int ret = 0; > + > + while (dev && get_dma_ops(dev)) > + dev = dev->parent; > + if (!dev) { > + qdma_err(qdev, "dma device not found"); > + return -EINVAL; > + } > + set_dma_ops(&qdev->pdev->dev, get_dma_ops(dev)); > + > + ret = qdma_setup_fmap_context(qdev); > + if (ret) { > + qdma_err(qdev, "Failed setup fmap context"); > + return ret; > + } > + > + /* Setup global ring buffer size at QDMA_DEFAULT_RING_ID index */ > + ret = qdma_reg_write(qdev, &ring_sz, QDMA_REGO_RING_SIZE); > + if (ret) { > + qdma_err(qdev, "Failed to setup ring %d of size %ld", > + QDMA_DEFAULT_RING_ID, QDMA_DEFAULT_RING_SIZE); > + return ret; > + } > + > + /* Enable memory-mapped DMA engine in both directions */ > + ret = qdma_sgdma_control(qdev, 1); > + if (ret) { > + qdma_err(qdev, "Failed to SGDMA with error %d", ret); > + return ret; > + } > + > + ret = qdma_alloc_queues(qdev, DMA_MEM_TO_DEV); > + if (ret) { > + qdma_err(qdev, "Failed to alloc H2C queues, ret %d", ret); > + return ret; > + } > + > + ret = qdma_alloc_queues(qdev, DMA_DEV_TO_MEM); > + if (ret) { > + qdma_err(qdev, "Failed to alloc C2H queues, ret %d", ret); > + return ret; > + } > + > + return 0; > +} > + > +/** > + * qdma_free_queue_resources() - Free queue resources > + * @chan: DMA channel > + */ > +static void qdma_free_queue_resources(struct dma_chan *chan) > +{ > + struct qdma_queue *queue = to_qdma_queue(chan); > + struct qdma_device *qdev = queue->qdev; > + struct device *dev = qdev->dma_dev.dev; > + > + qdma_clear_queue_context(queue); > + vchan_free_chan_resources(&queue->vchan); > + dma_free_coherent(dev, queue->ring_size * QDMA_MM_DESC_SIZE, > + queue->desc_base, queue->dma_desc_base); > +} > + > +/** > + * qdma_alloc_queue_resources() - Allocate queue resources > + * @chan: DMA channel > + */ > +static int qdma_alloc_queue_resources(struct dma_chan *chan) > +{ > + struct qdma_queue *queue = to_qdma_queue(chan); > + struct qdma_device *qdev = queue->qdev; > + struct qdma_ctxt_sw_desc desc; > + size_t size; > + int ret; > + > + ret = qdma_clear_queue_context(queue); > + if (ret) > + return ret; > + > + size = queue->ring_size * QDMA_MM_DESC_SIZE; > + queue->desc_base = dma_alloc_coherent(qdev->dma_dev.dev, size, > + &queue->dma_desc_base, > + GFP_KERNEL); > + if (!queue->desc_base) { > + qdma_err(qdev, "Failed to allocate descriptor ring"); > + return -ENOMEM; > + } > + > + /* Setup SW descriptor queue context for DMA memory map */ > + desc.vec = qdma_get_intr_ring_idx(qdev); > + desc.desc_base = queue->dma_desc_base; > + ret = qdma_setup_queue_context(qdev, &desc, queue->dir, queue->qid); > + if (ret) { > + qdma_err(qdev, "Failed to setup SW desc ctxt for %s", > + chan->name); > + dma_free_coherent(qdev->dma_dev.dev, size, queue->desc_base, > + queue->dma_desc_base); > + return ret; > + } > + > + queue->pidx = 0; > + queue->cidx = 0; > + > + return 0; > +} > + > +static bool qdma_filter_fn(struct dma_chan *chan, void *param) > +{ > + struct qdma_queue *queue = to_qdma_queue(chan); > + struct qdma_queue_info *info = param; > + > + return info->dir == queue->dir; > +} > + > +static int qdma_xfer_start(struct qdma_queue *queue) > +{ > + struct qdma_device *qdev = queue->qdev; > + int ret; > + > + if (!vchan_next_desc(&queue->vchan)) > + return 0; > + > + qdma_dbg(qdev, "Tnx kickoff with P: %d for %s%d", > + queue->issued_vdesc->pidx, CHAN_STR(queue), queue->qid); > + > + ret = qdma_update_pidx(queue, queue->issued_vdesc->pidx); > + if (ret) { > + qdma_err(qdev, "Failed to update PIDX to %d for %s queue: %d", > + queue->pidx, CHAN_STR(queue), queue->qid); > + } > + > + return ret; > +} > + > +static void qdma_issue_pending(struct dma_chan *chan) > +{ > + struct qdma_queue *queue = to_qdma_queue(chan); > + unsigned long flags; > + > + spin_lock_irqsave(&queue->vchan.lock, flags); > + if (vchan_issue_pending(&queue->vchan)) { > + if (queue->submitted_vdesc) { > + queue->issued_vdesc = queue->submitted_vdesc; > + queue->submitted_vdesc = NULL; > + } > + qdma_xfer_start(queue); > + } > + > + spin_unlock_irqrestore(&queue->vchan.lock, flags); > +} > + > +static struct qdma_mm_desc *qdma_get_desc(struct qdma_queue *q) > +{ > + struct qdma_mm_desc *desc; > + > + if (((q->pidx + 1) & q->idx_mask) == q->cidx) > + return NULL; > + > + desc = q->desc_base + q->pidx; > + q->pidx = (q->pidx + 1) & q->idx_mask; > + > + return desc; > +} > + > +static int qdma_hw_enqueue(struct qdma_queue *q, struct qdma_mm_vdesc *vdesc) > +{ > + struct qdma_mm_desc *desc; > + struct scatterlist *sg; > + u64 addr, *src, *dst; > + u32 rest, len; > + int ret = 0; > + u32 i; > + > + if (!vdesc->sg_len) > + return 0; > + > + if (q->dir == DMA_MEM_TO_DEV) { > + dst = &vdesc->dev_addr; > + src = &addr; > + } else { > + dst = &addr; > + src = &vdesc->dev_addr; > + } > + > + for_each_sg(vdesc->sgl, sg, vdesc->sg_len, i) { > + addr = sg_dma_address(sg) + vdesc->sg_off; > + rest = sg_dma_len(sg) - vdesc->sg_off; > + while (rest) { > + len = min_t(u32, rest, QDMA_MM_DESC_MAX_LEN); > + desc = qdma_get_desc(q); > + if (!desc) { > + ret = -EBUSY; > + goto out; > + } > + > + desc->src_addr = cpu_to_le64(*src); > + desc->dst_addr = cpu_to_le64(*dst); > + desc->len = cpu_to_le32(len); > + > + vdesc->dev_addr += len; > + vdesc->sg_off += len; > + vdesc->pending_descs++; > + addr += len; > + rest -= len; > + } > + vdesc->sg_off = 0; > + } > +out: > + vdesc->sg_len -= i; > + vdesc->pidx = q->pidx; > + return ret; > +} > + > +static void qdma_fill_pending_vdesc(struct qdma_queue *q) > +{ > + struct virt_dma_chan *vc = &q->vchan; > + struct qdma_mm_vdesc *vdesc = NULL; > + struct virt_dma_desc *vd; > + int ret; > + > + if (!list_empty(&vc->desc_issued)) { > + vd = &q->issued_vdesc->vdesc; > + list_for_each_entry_from(vd, &vc->desc_issued, node) { > + vdesc = to_qdma_vdesc(vd); > + ret = qdma_hw_enqueue(q, vdesc); > + if (ret) { > + q->issued_vdesc = vdesc; > + return; > + } > + } > + q->issued_vdesc = vdesc; > + } > + > + if (list_empty(&vc->desc_submitted)) > + return; > + > + if (q->submitted_vdesc) > + vd = &q->submitted_vdesc->vdesc; > + else > + vd = list_first_entry(&vc->desc_submitted, typeof(*vd), node); > + > + list_for_each_entry_from(vd, &vc->desc_submitted, node) { > + vdesc = to_qdma_vdesc(vd); > + ret = qdma_hw_enqueue(q, vdesc); > + if (ret) > + break; > + } > + q->submitted_vdesc = vdesc; > +} > + > +static dma_cookie_t qdma_tx_submit(struct dma_async_tx_descriptor *tx) > +{ > + struct virt_dma_chan *vc = to_virt_chan(tx->chan); > + struct qdma_queue *q = to_qdma_queue(&vc->chan); > + struct virt_dma_desc *vd; > + unsigned long flags; > + dma_cookie_t cookie; > + > + vd = container_of(tx, struct virt_dma_desc, tx); > + spin_lock_irqsave(&vc->lock, flags); > + cookie = dma_cookie_assign(tx); > + > + list_move_tail(&vd->node, &vc->desc_submitted); > + qdma_fill_pending_vdesc(q); > + spin_unlock_irqrestore(&vc->lock, flags); > + > + return cookie; > +} > + > +static void *qdma_get_metadata_ptr(struct dma_async_tx_descriptor *tx, > + size_t *payload_len, size_t *max_len) > +{ > + struct qdma_mm_vdesc *vdesc; > + > + vdesc = container_of(tx, typeof(*vdesc), vdesc.tx); > + if (payload_len) > + *payload_len = sizeof(vdesc->dev_addr); > + if (max_len) > + *max_len = sizeof(vdesc->dev_addr); > + > + return &vdesc->dev_addr; Can you describe what metadata is being used here for? > +} > + > +static int qdma_set_metadata_len(struct dma_async_tx_descriptor *tx, > + size_t payload_len) > +{ > + struct qdma_mm_vdesc *vdesc; > + > + vdesc = container_of(tx, typeof(*vdesc), vdesc.tx); > + if (payload_len != sizeof(vdesc->dev_addr)) > + return -EINVAL; > + > + return 0; > +} > + > +static struct dma_descriptor_metadata_ops metadata_ops = { > + .get_ptr = qdma_get_metadata_ptr, > + .set_len = qdma_set_metadata_len, > +}; > + > +static struct dma_async_tx_descriptor * > +qdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl, > + unsigned int sg_len, enum dma_transfer_direction dir, > + unsigned long flags, void *context) > +{ > + struct qdma_queue *q = to_qdma_queue(chan); > + struct dma_async_tx_descriptor *tx; > + struct qdma_mm_vdesc *vdesc; > + > + vdesc = kzalloc(sizeof(*vdesc), GFP_NOWAIT); > + if (!vdesc) > + return NULL; > + vdesc->sgl = sgl; > + vdesc->sg_len = sg_len; > + > + tx = vchan_tx_prep(&q->vchan, &vdesc->vdesc, flags); > + tx->tx_submit = qdma_tx_submit; > + tx->metadata_ops = &metadata_ops; > + > + return tx; > +} > + > +static int qdma_arm_err_intr(const struct qdma_device *qdev) > +{ > + u32 value = 0; > + > + qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_FUNC, qdev->fid); > + qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_VEC, qdev->err_irq_idx); > + qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_ARM, 1); > + > + return qdma_reg_write(qdev, &value, QDMA_REGO_ERR_INT); > +} > + > +static irqreturn_t qdma_error_isr(int irq, void *data) > +{ > + struct qdma_device *qdev = data; > + u32 err_stat = 0; > + int ret; > + > + ret = qdma_reg_read(qdev, &err_stat, QDMA_REGO_ERR_STAT); > + if (ret) { > + qdma_err(qdev, "read error state failed, ret %d", ret); > + goto out; > + } > + > + qdma_err(qdev, "global error %d", err_stat); > + ret = qdma_reg_write(qdev, &err_stat, QDMA_REGO_ERR_STAT); > + if (ret) > + qdma_err(qdev, "clear error state failed, ret %d", ret); > + > +out: > + qdma_arm_err_intr(qdev); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t qdma_queue_isr(int irq, void *data) > +{ > + struct qdma_intr_ring *intr = data; > + struct qdma_queue *q = NULL; > + struct qdma_device *qdev; > + u32 index, comp_desc; > + u64 intr_ent; > + u8 color; > + int ret; > + u16 qid; > + > + qdev = intr->qdev; > + index = intr->cidx; > + while (1) { > + struct virt_dma_desc *vd; > + struct qdma_mm_vdesc *vdesc; > + unsigned long flags; > + u32 cidx; > + > + intr_ent = le64_to_cpu(intr->base[index]); > + color = FIELD_GET(QDMA_INTR_MASK_COLOR, intr_ent); > + if (color != intr->color) > + break; > + > + qid = FIELD_GET(QDMA_INTR_MASK_QID, intr_ent); > + if (FIELD_GET(QDMA_INTR_MASK_TYPE, intr_ent)) > + q = qdev->c2h_queues; > + else > + q = qdev->h2c_queues; > + q += qid; > + > + cidx = FIELD_GET(QDMA_INTR_MASK_CIDX, intr_ent); > + > + spin_lock_irqsave(&q->vchan.lock, flags); > + comp_desc = (cidx - q->cidx) & q->idx_mask; > + > + vd = vchan_next_desc(&q->vchan); > + if (!vd) > + goto skip; > + > + vdesc = to_qdma_vdesc(vd); > + while (comp_desc > vdesc->pending_descs) { > + list_del(&vd->node); > + vchan_cookie_complete(vd); > + comp_desc -= vdesc->pending_descs; > + vd = vchan_next_desc(&q->vchan); > + vdesc = to_qdma_vdesc(vd); > + } > + vdesc->pending_descs -= comp_desc; > + if (!vdesc->pending_descs && QDMA_VDESC_QUEUED(vdesc)) { > + list_del(&vd->node); > + vchan_cookie_complete(vd); > + } > + q->cidx = cidx; > + > + qdma_fill_pending_vdesc(q); > + qdma_xfer_start(q); > + > +skip: > + spin_unlock_irqrestore(&q->vchan.lock, flags); > + > + /* > + * Wrap the index value and flip the expected color value if > + * interrupt aggregation PIDX has wrapped around. > + */ > + index++; > + index &= QDMA_INTR_RING_IDX_MASK; > + if (!index) > + intr->color = !intr->color; > + } > + > + /* > + * Update the software interrupt aggregation ring CIDX if a valid entry > + * was found. > + */ > + if (q) { > + qdma_dbg(qdev, "update intr ring%d %d", intr->ridx, index); > + > + /* > + * Record the last read index of status descriptor from the > + * interrupt aggregation ring. > + */ > + intr->cidx = index; > + > + ret = qdma_update_cidx(q, intr->ridx, index); > + if (ret) { > + qdma_err(qdev, "Failed to update IRQ CIDX"); > + return IRQ_NONE; > + } > + } > + > + return IRQ_HANDLED; > +} > + > +static int qdma_init_error_irq(struct qdma_device *qdev) > +{ > + struct device *dev = &qdev->pdev->dev; > + int ret; > + u32 vec; > + > + vec = qdev->queue_irq_start - 1; > + > + ret = devm_request_threaded_irq(dev, vec, NULL, qdma_error_isr, > + IRQF_ONESHOT, "amd-qdma-error", qdev); > + if (ret) { > + qdma_err(qdev, "Failed to request error IRQ vector: %d", vec); > + return ret; > + } > + > + ret = qdma_arm_err_intr(qdev); > + if (ret) > + qdma_err(qdev, "Failed to arm err interrupt, ret %d", ret); > + > + return ret; > +} > + > +static int qdmam_alloc_qintr_rings(struct qdma_device *qdev) > +{ > + u32 ctxt[QDMA_CTXT_REGMAP_LEN]; > + struct device *dev = &qdev->pdev->dev; > + struct qdma_intr_ring *ring; > + struct qdma_ctxt_intr intr_ctxt; > + u32 vector; > + int ret, i; > + > + qdev->qintr_ring_num = qdev->queue_irq_num; > + qdev->qintr_rings = devm_kcalloc(dev, qdev->qintr_ring_num, > + sizeof(*qdev->qintr_rings), > + GFP_KERNEL); > + if (!qdev->qintr_rings) > + return -ENOMEM; > + > + vector = qdev->queue_irq_start; > + for (i = 0; i < qdev->qintr_ring_num; i++, vector++) { > + ring = &qdev->qintr_rings[i]; > + ring->qdev = qdev; > + ring->msix_id = qdev->err_irq_idx + i + 1; > + ring->ridx = i; > + ring->color = 1; > + ring->base = dmam_alloc_coherent(dev, QDMA_INTR_RING_SIZE, > + &ring->dev_base, GFP_KERNEL); > + if (!ring->base) { > + qdma_err(qdev, "Failed to alloc intr ring %d", i); > + return -ENOMEM; > + } > + intr_ctxt.agg_base = QDMA_INTR_RING_BASE(ring->dev_base); > + intr_ctxt.size = (QDMA_INTR_RING_SIZE - 1) / 4096; > + intr_ctxt.vec = ring->msix_id; > + intr_ctxt.valid = true; > + intr_ctxt.color = true; > + ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL, > + QDMA_CTXT_CLEAR, ring->ridx, NULL); > + if (ret) { > + qdma_err(qdev, "Failed clear intr ctx, ret %d", ret); > + return ret; > + } > + > + qdma_prep_intr_context(qdev, &intr_ctxt, ctxt); > + ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL, > + QDMA_CTXT_WRITE, ring->ridx, ctxt); > + if (ret) { > + qdma_err(qdev, "Failed setup intr ctx, ret %d", ret); > + return ret; > + } > + > + ret = devm_request_threaded_irq(dev, vector, NULL, > + qdma_queue_isr, IRQF_ONESHOT, > + "amd-qdma-queue", ring); > + if (ret) { > + qdma_err(qdev, "Failed to request irq %d", vector); > + return ret; > + } > + } > + > + return 0; > +} > + > +static int qdma_intr_init(struct qdma_device *qdev) > +{ > + int ret; > + > + ret = qdma_init_error_irq(qdev); > + if (ret) { > + qdma_err(qdev, "Failed to init error IRQs, ret %d", ret); > + return ret; > + } > + > + ret = qdmam_alloc_qintr_rings(qdev); > + if (ret) { > + qdma_err(qdev, "Failed to init queue IRQs, ret %d", ret); > + return ret; > + } > + > + return 0; > +} > + > +static void amd_qdma_remove(struct platform_device *pdev) > +{ > + struct qdma_device *qdev = platform_get_drvdata(pdev); > + > + qdma_sgdma_control(qdev, 0); > + dma_async_device_unregister(&qdev->dma_dev); > + > + mutex_destroy(&qdev->ctxt_lock); > +} > + > +static int amd_qdma_probe(struct platform_device *pdev) > +{ > + struct qdma_platdata *pdata = dev_get_platdata(&pdev->dev); > + struct qdma_device *qdev; > + struct resource *res; > + void __iomem *regs; > + int ret; > + > + qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL); > + if (!qdev) > + return -ENOMEM; > + > + platform_set_drvdata(pdev, qdev); > + qdev->pdev = pdev; > + mutex_init(&qdev->ctxt_lock); > + > + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); > + if (!res) { > + qdma_err(qdev, "Failed to get IRQ resource"); > + ret = -ENODEV; > + goto failed; > + } > + qdev->err_irq_idx = pdata->irq_index; > + qdev->queue_irq_start = res->start + 1; > + qdev->queue_irq_num = resource_size(res) - 1; > + > + regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); > + if (IS_ERR(regs)) { > + ret = PTR_ERR(regs); > + qdma_err(qdev, "Failed to map IO resource, err %d", ret); > + goto failed; > + } > + > + qdev->regmap = devm_regmap_init_mmio(&pdev->dev, regs, > + &qdma_regmap_config); > + if (IS_ERR(qdev->regmap)) { > + ret = PTR_ERR(qdev->regmap); > + qdma_err(qdev, "Regmap init failed, err %d", ret); > + goto failed; > + } > + > + ret = qdma_device_verify(qdev); > + if (ret) > + goto failed; > + > + ret = qdma_get_hw_info(qdev); > + if (ret) > + goto failed; > + > + INIT_LIST_HEAD(&qdev->dma_dev.channels); > + > + ret = qdma_device_setup(qdev); > + if (ret) > + goto failed; > + > + ret = qdma_intr_init(qdev); > + if (ret) { > + qdma_err(qdev, "Failed to initialize IRQs %d", ret); > + goto failed_disable_engine; > + } > + > + dma_cap_set(DMA_SLAVE, qdev->dma_dev.cap_mask); > + dma_cap_set(DMA_PRIVATE, qdev->dma_dev.cap_mask); > + > + qdev->dma_dev.dev = &pdev->dev; > + qdev->dma_dev.filter.map = pdata->device_map; > + qdev->dma_dev.filter.mapcnt = qdev->chan_num * 2; > + qdev->dma_dev.filter.fn = qdma_filter_fn; > + qdev->dma_dev.desc_metadata_modes = DESC_METADATA_ENGINE; > + qdev->dma_dev.device_alloc_chan_resources = qdma_alloc_queue_resources; > + qdev->dma_dev.device_free_chan_resources = qdma_free_queue_resources; > + qdev->dma_dev.device_prep_slave_sg = qdma_prep_device_sg; > + qdev->dma_dev.device_issue_pending = qdma_issue_pending; > + qdev->dma_dev.device_tx_status = dma_cookie_status; > + qdev->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); > + > + ret = dma_async_device_register(&qdev->dma_dev); > + if (ret) { > + qdma_err(qdev, "Failed to register AMD QDMA: %d", ret); > + goto failed_disable_engine; > + } > + > + return 0; > + > +failed_disable_engine: > + qdma_sgdma_control(qdev, 0); > +failed: > + mutex_destroy(&qdev->ctxt_lock); > + qdma_err(qdev, "Failed to probe AMD QDMA driver"); > + return ret; > +} > + > +static struct platform_driver amd_qdma_driver = { > + .driver = { > + .name = "amd-qdma", > + }, > + .probe = amd_qdma_probe, > + .remove_new = amd_qdma_remove, > +}; > + > +module_platform_driver(amd_qdma_driver); > + > +MODULE_DESCRIPTION("AMD QDMA driver"); > +MODULE_AUTHOR("XRT Team <runtimeca39d@xxxxxxx>"); > +MODULE_LICENSE("GPL"); > diff --git a/drivers/dma/amd/qdma/qdma.h b/drivers/dma/amd/qdma/qdma.h > new file mode 100644 > index 000000000000..268fbad9f0a2 > --- /dev/null > +++ b/drivers/dma/amd/qdma/qdma.h > @@ -0,0 +1,265 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +/* > + * DMA header for AMD Queue-based DMA Subsystem > + * > + * Copyright (C) 2023, Advanced Micro Devices, Inc. > + */ > + > +#ifndef __QDMA_H > +#define __QDMA_H > + > +#include <linux/bitfield.h> > +#include <linux/dmaengine.h> > +#include <linux/kernel.h> > +#include <linux/platform_device.h> > +#include <linux/regmap.h> > + > +#include "../../virt-dma.h" > + > +#define DISABLE 0 > +#define ENABLE 1 > + > +#define QDMA_MIN_IRQ 3 > +#define QDMA_INTR_NAME_MAX_LEN 30 > +#define QDMA_INTR_PREFIX "amd-qdma" > + > +#define QDMA_IDENTIFIER 0x1FD3 > +#define QDMA_DEFAULT_RING_SIZE (BIT(10) + 1) > +#define QDMA_DEFAULT_RING_ID 0 > +#define QDMA_POLL_INTRVL_US 10 /* 10us */ > +#define QDMA_POLL_TIMEOUT_US (500 * 1000) /* 500ms */ > +#define QDMA_DMAP_REG_STRIDE 16 > +#define QDMA_CTXT_REGMAP_LEN 8 /* 8 regs */ > +#define QDMA_MM_DESC_SIZE 32 /* Bytes */ > +#define QDMA_MM_DESC_LEN_BITS 28 > +#define QDMA_MM_DESC_MAX_LEN (BIT(QDMA_MM_DESC_LEN_BITS) - 1) > +#define QDMA_MIN_DMA_ALLOC_SIZE 4096 > +#define QDMA_INTR_RING_SIZE BIT(13) > +#define QDMA_INTR_RING_IDX_MASK GENMASK(9, 0) > +#define QDMA_INTR_RING_BASE(_addr) ((_addr) >> 12) > + > +#define QDMA_IDENTIFIER_REGOFF 0x0 > +#define QDMA_IDENTIFIER_MASK GENMASK(31, 16) > +#define QDMA_QUEUE_ARM_BIT BIT(16) > + > +#define qdma_err(qdev, fmt, args...) \ > + dev_err(&(qdev)->pdev->dev, fmt, ##args) > + > +#define qdma_dbg(qdev, fmt, args...) \ > + dev_dbg(&(qdev)->pdev->dev, fmt, ##args) > + > +#define qdma_info(qdev, fmt, args...) \ > + dev_info(&(qdev)->pdev->dev, fmt, ##args) > + > +enum qdma_reg_fields { > + QDMA_REGF_IRQ_ENABLE, > + QDMA_REGF_WBK_ENABLE, > + QDMA_REGF_WBI_CHECK, > + QDMA_REGF_IRQ_ARM, > + QDMA_REGF_IRQ_VEC, > + QDMA_REGF_IRQ_AGG, > + QDMA_REGF_WBI_INTVL_ENABLE, > + QDMA_REGF_MRKR_DISABLE, > + QDMA_REGF_QUEUE_ENABLE, > + QDMA_REGF_QUEUE_MODE, > + QDMA_REGF_DESC_BASE, > + QDMA_REGF_DESC_SIZE, > + QDMA_REGF_RING_ID, > + QDMA_REGF_CMD_INDX, > + QDMA_REGF_CMD_CMD, > + QDMA_REGF_CMD_TYPE, > + QDMA_REGF_CMD_BUSY, > + QDMA_REGF_QUEUE_COUNT, > + QDMA_REGF_QUEUE_MAX, > + QDMA_REGF_QUEUE_BASE, > + QDMA_REGF_FUNCTION_ID, > + QDMA_REGF_INTR_AGG_BASE, > + QDMA_REGF_INTR_VECTOR, > + QDMA_REGF_INTR_SIZE, > + QDMA_REGF_INTR_VALID, > + QDMA_REGF_INTR_COLOR, > + QDMA_REGF_INTR_FUNCTION_ID, > + QDMA_REGF_ERR_INT_FUNC, > + QDMA_REGF_ERR_INT_VEC, > + QDMA_REGF_ERR_INT_ARM, > + QDMA_REGF_MAX > +}; > + > +enum qdma_regs { > + QDMA_REGO_CTXT_DATA, > + QDMA_REGO_CTXT_CMD, > + QDMA_REGO_CTXT_MASK, > + QDMA_REGO_MM_H2C_CTRL, > + QDMA_REGO_MM_C2H_CTRL, > + QDMA_REGO_QUEUE_COUNT, > + QDMA_REGO_RING_SIZE, > + QDMA_REGO_H2C_PIDX, > + QDMA_REGO_C2H_PIDX, > + QDMA_REGO_INTR_CIDX, > + QDMA_REGO_FUNC_ID, > + QDMA_REGO_ERR_INT, > + QDMA_REGO_ERR_STAT, > + QDMA_REGO_MAX > +}; > + > +struct qdma_reg_field { > + u16 lsb; /* Least significant bit of field */ > + u16 msb; /* Most significant bit of field */ > +}; > + > +struct qdma_reg { > + u32 off; > + u32 count; > +}; > + > +#define QDMA_REGF(_msb, _lsb) { \ > + .lsb = (_lsb), \ > + .msb = (_msb), \ > +} > + > +#define QDMA_REGO(_off, _count) { \ > + .off = (_off), \ > + .count = (_count), \ > +} > + > +enum qdma_desc_size { > + QDMA_DESC_SIZE_8B, > + QDMA_DESC_SIZE_16B, > + QDMA_DESC_SIZE_32B, > + QDMA_DESC_SIZE_64B, > +}; > + > +enum qdma_queue_op_mode { > + QDMA_QUEUE_OP_STREAM, > + QDMA_QUEUE_OP_MM, > +}; > + > +enum qdma_ctxt_type { > + QDMA_CTXT_DESC_SW_C2H, > + QDMA_CTXT_DESC_SW_H2C, > + QDMA_CTXT_DESC_HW_C2H, > + QDMA_CTXT_DESC_HW_H2C, > + QDMA_CTXT_DESC_CR_C2H, > + QDMA_CTXT_DESC_CR_H2C, > + QDMA_CTXT_WRB, > + QDMA_CTXT_PFTCH, > + QDMA_CTXT_INTR_COAL, > + QDMA_CTXT_RSVD, > + QDMA_CTXT_HOST_PROFILE, > + QDMA_CTXT_TIMER, > + QDMA_CTXT_FMAP, > + QDMA_CTXT_FNC_STS, > +}; > + > +enum qdma_ctxt_cmd { > + QDMA_CTXT_CLEAR, > + QDMA_CTXT_WRITE, > + QDMA_CTXT_READ, > + QDMA_CTXT_INVALIDATE, > + QDMA_CTXT_MAX > +}; > + > +struct qdma_ctxt_sw_desc { > + u64 desc_base; > + u16 vec; > +}; > + > +struct qdma_ctxt_intr { > + u64 agg_base; > + u16 vec; > + u32 size; > + bool valid; > + bool color; > +}; > + > +struct qdma_ctxt_fmap { > + u16 qbase; > + u16 qmax; > +}; > + > +struct qdma_device; > + > +struct qdma_mm_desc { > + __le64 src_addr; > + __le32 len; > + __le32 reserved1; > + __le64 dst_addr; > + __le64 reserved2; > +} __packed; > + > +struct qdma_mm_vdesc { > + struct virt_dma_desc vdesc; > + struct qdma_queue *queue; > + struct scatterlist *sgl; > + u64 sg_off; > + u32 sg_len; > + u64 dev_addr; > + u32 pidx; > + u32 pending_descs; > +}; > + > +#define QDMA_VDESC_QUEUED(vdesc) (!(vdesc)->sg_len) > + > +struct qdma_queue { > + struct qdma_device *qdev; > + struct virt_dma_chan vchan; > + enum dma_transfer_direction dir; > + struct dma_slave_config cfg; > + struct qdma_mm_desc *desc_base; > + struct qdma_mm_vdesc *submitted_vdesc; > + struct qdma_mm_vdesc *issued_vdesc; > + dma_addr_t dma_desc_base; > + u32 pidx_reg; > + u32 cidx_reg; > + u32 ring_size; > + u32 idx_mask; > + u16 qid; > + u32 pidx; > + u32 cidx; > +}; > + > +struct qdma_intr_ring { > + struct qdma_device *qdev; > + __le64 *base; > + dma_addr_t dev_base; > + char msix_name[QDMA_INTR_NAME_MAX_LEN]; > + u32 msix_vector; > + u16 msix_id; > + u32 ring_size; > + u16 ridx; > + u16 cidx; > + u8 color; > +}; > + > +#define QDMA_INTR_MASK_PIDX GENMASK_ULL(15, 0) > +#define QDMA_INTR_MASK_CIDX GENMASK_ULL(31, 16) > +#define QDMA_INTR_MASK_DESC_COLOR GENMASK_ULL(32, 32) > +#define QDMA_INTR_MASK_STATE GENMASK_ULL(34, 33) > +#define QDMA_INTR_MASK_ERROR GENMASK_ULL(36, 35) > +#define QDMA_INTR_MASK_TYPE GENMASK_ULL(38, 38) > +#define QDMA_INTR_MASK_QID GENMASK_ULL(62, 39) > +#define QDMA_INTR_MASK_COLOR GENMASK_ULL(63, 63) > + > +struct qdma_device { > + struct platform_device *pdev; > + struct dma_device dma_dev; > + struct regmap *regmap; > + struct mutex ctxt_lock; /* protect ctxt registers */ > + const struct qdma_reg_field *rfields; > + const struct qdma_reg *roffs; > + struct qdma_queue *h2c_queues; > + struct qdma_queue *c2h_queues; > + struct qdma_intr_ring *qintr_rings; > + u32 qintr_ring_num; > + u32 qintr_ring_idx; > + u32 chan_num; > + u32 queue_irq_start; > + u32 queue_irq_num; > + u32 err_irq_idx; > + u32 fid; > +}; > + > +extern const struct qdma_reg qdma_regos_default[QDMA_REGO_MAX]; > +extern const struct qdma_reg_field qdma_regfs_default[QDMA_REGF_MAX]; > + > +#endif /* __QDMA_H */ > diff --git a/include/linux/platform_data/amd_qdma.h b/include/linux/platform_data/amd_qdma.h > new file mode 100644 > index 000000000000..59fa0c174f70 > --- /dev/null > +++ b/include/linux/platform_data/amd_qdma.h > @@ -0,0 +1,36 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +/* > + * Copyright (C) 2023, Advanced Micro Devices, Inc. > + */ > + > +#ifndef _PLATDATA_AMD_QDMA_H > +#define _PLATDATA_AMD_QDMA_H > + > +#include <linux/dmaengine.h> > + > +/** > + * struct qdma_queue_info - DMA queue information. This information is used to > + * match queue when DMA channel is requested > + * @dir: Channel transfer direction > + */ > +struct qdma_queue_info { > + enum dma_transfer_direction dir; > +}; why is dma_transfer_direction here, why not use prep_ calls for passing that around? > + > +#define QDMA_FILTER_PARAM(qinfo) ((void *)(qinfo)) > + > +struct dma_slave_map; > + > +/** > + * struct qdma_platdata - Platform specific data for QDMA engine > + * @max_mm_channels: Maximum number of MM DMA channels in each direction > + * @device_map: DMA slave map > + * @irq_index: The index of first IRQ > + */ > +struct qdma_platdata { > + u32 max_mm_channels; > + u32 irq_index; > + struct dma_slave_map *device_map; > +}; why should this be plat data? -- ~Vinod