On 6/19/2024 12:59 PM, Philipp Stanner wrote: > On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote: >> Use the pt_dmaengine_register function to register a AE4DMA DMA >> engine. >> >> Reviewed-by: Raju Rangoju <Raju.Rangoju@xxxxxxx> >> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx> >> --- >> drivers/dma/amd/ae4dma/Makefile | 2 +- >> drivers/dma/amd/ae4dma/ae4dma-dev.c | 73 >> +++++++++++++++++++++++++++++ >> drivers/dma/amd/ae4dma/ae4dma-pci.c | 1 + >> drivers/dma/amd/ae4dma/ae4dma.h | 2 + >> 4 files changed, 77 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/dma/amd/ae4dma/Makefile >> b/drivers/dma/amd/ae4dma/Makefile >> index e918f85a80ec..165d1c74b732 100644 >> --- a/drivers/dma/amd/ae4dma/Makefile >> +++ b/drivers/dma/amd/ae4dma/Makefile >> @@ -5,6 +5,6 @@ >> >> obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o >> >> -ae4dma-objs := ae4dma-dev.o >> +ae4dma-objs := ae4dma-dev.o ../ptdma/ptdma-dmaengine.o >> ../common/amd_dma.o >> >> ae4dma-$(CONFIG_PCI) += ae4dma-pci.o >> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c >> b/drivers/dma/amd/ae4dma/ae4dma-dev.c >> index 958bdab8db59..77c37649d8d1 100644 >> --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c >> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c >> @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct >> ae4_cmd_queue *ae4cmd_q, int idx) >> } >> } >> >> +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue >> *cmd_q) >> +{ >> + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct >> ae4_cmd_queue, cmd_q); >> + int i; >> + >> + for (i = 0; i < CMD_Q_LEN; i++) >> + ae4_check_status_error(ae4cmd_q, i); >> +} >> + >> static void ae4_pending_work(struct work_struct *work) >> { >> struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct >> ae4_cmd_queue, p_work.work); >> @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq, >> void *data) >> return IRQ_HANDLED; >> } >> >> +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct >> ae4_cmd_queue *ae4cmd_q) >> +{ >> + bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); >> + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; >> + u32 tail_wi; >> + >> + if (soc) { >> + desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc- >>> dwouv.dw0); >> + desc->dwouv.dw0 &= ~DWORD0_SOC; >> + } >> + >> + mutex_lock(&ae4cmd_q->cmd_lock); >> + >> + tail_wi = atomic_read(&ae4cmd_q->tail_wi); >> + memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct >> ae4dma_desc)); >> + >> + atomic64_inc(&ae4cmd_q->q_cmd_count); >> + >> + tail_wi = (tail_wi + 1) % CMD_Q_LEN; >> + >> + atomic_set(&ae4cmd_q->tail_wi, tail_wi); >> + /* Synchronize ordering */ >> + mb(); >> + >> + writel(tail_wi, cmd_q->reg_control + 0x10); >> + /* Synchronize ordering */ >> + mb(); > Same here as in patch №2, I think writel() and mutex can't change their > relative order. > >> + >> + mutex_unlock(&ae4cmd_q->cmd_lock); > Same question: can't everything be done by the mutex alone? Sure , I will remove it in all applicable places. Thanks, -- Basavaraj > > > P. > >> + >> + wake_up(&ae4cmd_q->q_w); >> + >> + return 0; >> +} >> + >> +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, >> + struct pt_passthru_engine *pt_engine) >> +{ >> + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct >> ae4_cmd_queue, cmd_q); >> + struct ae4dma_desc desc; >> + >> + cmd_q->cmd_error = 0; >> + cmd_q->total_pt_ops++; >> + memset(&desc, 0, sizeof(desc)); >> + desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; >> + >> + desc.dw1.status = 0; >> + desc.dw1.err_code = 0; >> + desc.dw1.desc_id = 0; >> + >> + desc.length = pt_engine->src_len; >> + >> + desc.src_lo = upper_32_bits(pt_engine->src_dma); >> + desc.src_hi = lower_32_bits(pt_engine->src_dma); >> + desc.dst_lo = upper_32_bits(pt_engine->dst_dma); >> + desc.dst_hi = lower_32_bits(pt_engine->dst_dma); >> + >> + return ae4_core_execute_cmd(&desc, ae4cmd_q); >> +} >> + >> void ae4_destroy_work(struct ae4_device *ae4) >> { >> struct ae4_cmd_queue *ae4cmd_q; >> @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4) >> init_completion(&ae4cmd_q->cmp); >> } >> >> + ret = pt_dmaengine_register(pt); >> + if (ret) >> + ae4_destroy_work(ae4); >> + >> return ret; >> } >> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c >> b/drivers/dma/amd/ae4dma/ae4dma-pci.c >> index ddebf0609c4d..5450fa551eea 100644 >> --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c >> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c >> @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev, >> const struct pci_device_id *id) >> >> pt = &ae4->pt; >> pt->dev = dev; >> + pt->ver = AE4_DMA_VERSION; >> >> pt->io_regs = pcim_iomap_table(pdev)[0]; >> if (!pt->io_regs) { >> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h >> b/drivers/dma/amd/ae4dma/ae4dma.h >> index 4e4584e152a1..f1b6dcc1d8c3 100644 >> --- a/drivers/dma/amd/ae4dma/ae4dma.h >> +++ b/drivers/dma/amd/ae4dma/ae4dma.h >> @@ -16,6 +16,7 @@ >> >> #define AE4_DESC_COMPLETED 0x3 >> #define AE4_DMA_VERSION 4 >> +#define CMD_AE4_DESC_DW0_VAL 2 >> >> struct ae4_msix { >> int msix_count; >> @@ -36,6 +37,7 @@ struct ae4_cmd_queue { >> atomic64_t done_cnt; >> atomic64_t q_cmd_count; >> atomic_t dridx; >> + atomic_t tail_wi; >> unsigned int id; >> }; >>