Add plumbing for dmaengine subsystem connection. The driver register a DMA device per DSA device. The channels are dynamically registered when a workqueue is configured to be "kernel:dmanegine" type. The driver will utilize the newly introduced DMA request API calls to provide a lockless descriptor submission path. Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> --- drivers/dma/Kconfig | 1 drivers/dma/idxd/Makefile | 2 - drivers/dma/idxd/device.c | 2 + drivers/dma/idxd/dma.c | 119 +++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 14 +++++ drivers/dma/idxd/init.c | 48 ++++++++++++++++++ drivers/dma/idxd/irq.c | 101 ++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/submit.c | 51 +++++++++++++++++++ drivers/dma/idxd/sysfs.c | 28 +++++++++++ 9 files changed, 364 insertions(+), 2 deletions(-) create mode 100644 drivers/dma/idxd/dma.c diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 1e24d7b44fdb..002ccfeda22b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -283,6 +283,7 @@ config INTEL_IDXD tristate "Intel Data Accelerators support" depends on PCI && X86_64 select DMA_ENGINE + select DMA_ENGINE_REQUEST select SBITMAP help Enable support for the Intel(R) data accelerators present diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 50eca12015e2..a036ba0e77d2 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IDXD) += idxd.o -idxd-y := init.o irq.o device.o sysfs.o submit.o +idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 74a60a8bef76..49638d3a2151 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -5,7 +5,9 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/dmaengine.h> #include <uapi/linux/idxd.h> +#include "../dmaengine.h" #include "idxd.h" #include "registers.h" diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c new file mode 100644 index 000000000000..07fbc98668ae --- /dev/null +++ b/drivers/dma/idxd/dma.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/dmaengine.h> +#include <uapi/linux/idxd.h> +#include "../dmaengine.h" +#include "registers.h" +#include "idxd.h" + +void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res) +{ + u8 code = status & DSA_COMP_STATUS_MASK; + + switch (code) { + case DSA_COMP_SUCCESS: + *res = DMA_TRANS_NOERROR; + break; + case DSA_COMP_HW_ERR1: + *res = DMA_TRANS_READ_FAILED; + break; + default: + *res = DMA_TRANS_ERROR; + break; + } +} + +static int idxd_dma_submit_request(struct dma_chan *chan, + struct dma_request *req) +{ + struct idxd_wq *wq = container_of(chan, struct idxd_wq, dma_chan); + + if (req->cmd == DMA_MEMCPY) + return idxd_submit_memcpy(wq, req); + + return -EINVAL; +} + +static int idxd_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct idxd_wq *wq = container_of(chan, struct idxd_wq, dma_chan); + struct device *dev = &wq->idxd->pdev->dev; + + idxd_wq_get(wq); + dev_dbg(dev, "%s: client_count: %d\n", __func__, idxd_wq_refcount(wq)); + return 0; +} + +static void idxd_dma_free_chan_resources(struct dma_chan *chan) +{ + struct idxd_wq *wq = container_of(chan, struct idxd_wq, dma_chan); + struct device *dev = &wq->idxd->pdev->dev; + + idxd_wq_put(wq); + dev_dbg(dev, "%s: client_count: %d\n", __func__, idxd_wq_refcount(wq)); +} + +int idxd_register_dma_device(struct idxd_device *idxd) +{ + struct dma_device *dma = &idxd->dma_dev; + + INIT_LIST_HEAD(&dma->channels); + dma->dev = &idxd->pdev->dev; + + if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + + if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_NOOP) + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + + dma->device_submit_request = idxd_dma_submit_request; + dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; + dma->device_free_chan_resources = idxd_dma_free_chan_resources; + + return dma_async_request_device_register(&idxd->dma_dev); +} + +void idxd_unregister_dma_device(struct idxd_device *idxd) +{ + dma_async_device_unregister(&idxd->dma_dev); +} + +int idxd_register_dma_channel(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct dma_device *dma = &idxd->dma_dev; + struct dma_chan *chan = &wq->dma_chan; + struct idxd_group *group = wq->group; + int rc; + + memset(&wq->dma_chan, 0, sizeof(struct dma_chan)); + chan->device = dma; + list_add_tail(&chan->device_node, &dma->channels); + chan->max_sgs = wq->batch_size; + chan->depth = wq->size + + idxd->hw.gen_cap.max_descs_per_engine * group->num_engines; + + rc = dma_async_device_channel_register(dma, chan); + if (rc < 0) + return rc; + + rc = dma_chan_alloc_request_resources(chan); + if (rc < 0) { + dma_async_device_channel_unregister(dma, chan); + return rc; + } + + return 0; +} + +void idxd_unregister_dma_channel(struct idxd_wq *wq) +{ + dma_chan_free_request_resources(&wq->dma_chan); + dma_async_device_channel_unregister(&wq->idxd->dma_dev, &wq->dma_chan); +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 076456357962..fc1634e689cf 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -4,6 +4,7 @@ #define _IDXD_H_ #include <linux/sbitmap.h> +#include <linux/dmaengine.h> #include <linux/percpu-rwsem.h> #include <linux/wait.h> #include "registers.h" @@ -96,6 +97,7 @@ struct idxd_wq { int batches_size; int batch_size; struct sbitmap sbmap; + struct dma_chan dma_chan; struct percpu_rw_semaphore submit_lock; wait_queue_head_t submit_waitq; char name[WQ_NAME_SIZE + 1]; @@ -168,6 +170,8 @@ struct idxd_device { struct msix_entry *msix_entries; int num_wq_irqs; struct idxd_irq_entry *irq_entries; + + struct dma_device dma_dev; }; /* IDXD software descriptor */ @@ -182,6 +186,7 @@ struct idxd_desc { struct list_head list; int id; struct idxd_wq *wq; + struct dma_request *req; }; #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) @@ -253,4 +258,13 @@ int idxd_wq_disable(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); +/* submission */ +int idxd_submit_memcpy(struct idxd_wq *wq, struct dma_request *req); + +/* dmaengine */ +int idxd_register_dma_device(struct idxd_device *idxd); +void idxd_unregister_dma_device(struct idxd_device *idxd); +int idxd_register_dma_channel(struct idxd_wq *wq); +void idxd_unregister_dma_channel(struct idxd_wq *wq); +void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 229386464923..b2e887508078 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -15,6 +15,8 @@ #include <linux/device.h> #include <linux/idr.h> #include <uapi/linux/idxd.h> +#include <linux/dmaengine.h> +#include "../dmaengine.h" #include "registers.h" #include "idxd.h" @@ -396,6 +398,50 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; } +static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) +{ + struct idxd_desc *desc, *itr; + struct llist_node *head; + struct dma_request *req; + + head = llist_del_all(&ie->pending_llist); + if (!head) + return; + + llist_for_each_entry_safe(desc, itr, head, llnode) { + req = desc->req; + if (!desc->completion->status) + req->result.result = DMA_TRANS_ABORTED; + else if (desc->completion->status == DSA_COMP_SUCCESS) + req->result.result = DMA_TRANS_NOERROR; + else + req->result.result = DMA_TRANS_ERROR; + + dmaengine_request_complete(req); + idxd_free_desc(desc->wq, desc); + } +} + +static void idxd_flush_work_list(struct idxd_irq_entry *ie) +{ + struct idxd_desc *desc, *iter; + struct dma_request *req; + + list_for_each_entry_safe(desc, iter, &ie->work_list, list) { + req = desc->req; + list_del(&desc->list); + if (!desc->completion->status) + req->result.result = DMA_TRANS_ABORTED; + else if (desc->completion->status == DSA_COMP_SUCCESS) + req->result.result = DMA_TRANS_NOERROR; + else + req->result.result = DMA_TRANS_ERROR; + + dmaengine_request_complete(req); + idxd_free_desc(desc->wq, desc); + } +} + static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); @@ -419,6 +465,8 @@ static void idxd_shutdown(struct pci_dev *pdev) synchronize_irq(idxd->msix_entries[i].vector); if (i == 0) continue; + idxd_flush_pending_llist(irq_entry); + idxd_flush_work_list(irq_entry); } } diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index de4b80973c2f..b4adeb2817d1 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -5,7 +5,9 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/dmaengine.h> #include <uapi/linux/idxd.h> +#include "../dmaengine.h" #include "idxd.h" #include "registers.h" @@ -146,11 +148,110 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, + int *processed) +{ + struct idxd_desc *desc, *t; + struct llist_node *head; + int queued = 0; + struct dma_request *req; + + head = llist_del_all(&irq_entry->pending_llist); + if (!head) + return 0; + + llist_for_each_entry_safe(desc, t, head, llnode) { + req = desc->req; + if (desc->completion->status) { + if ((desc->completion->status & DSA_COMP_STATUS_MASK) != + DSA_COMP_SUCCESS) + idxd_parse_completion_status(desc->completion->status, + &req->result.result); + + dmaengine_request_complete(req); + idxd_free_desc(desc->wq, desc); + (*processed)++; + } else { + list_add_tail(&desc->list, &irq_entry->work_list); + queued++; + } + } + + return queued; +} + +static int irq_process_work_list(struct idxd_irq_entry *irq_entry, + int *processed) +{ + struct list_head *node, *next; + int queued = 0; + struct dma_request *req; + + if (list_empty(&irq_entry->work_list)) + return 0; + + list_for_each_safe(node, next, &irq_entry->work_list) { + struct idxd_desc *desc = + container_of(node, struct idxd_desc, list); + + req = desc->req; + if (desc->completion->status) { + list_del(&desc->list); + /* process and callback */ + if ((desc->completion->status & DSA_COMP_STATUS_MASK) != + DSA_COMP_SUCCESS) + idxd_parse_completion_status(desc->completion->status, + &req->result.result); + + dmaengine_request_complete(req); + idxd_free_desc(desc->wq, desc); + (*processed)++; + } else { + queued++; + } + } + + return queued; +} + irqreturn_t idxd_wq_thread(int irq, void *data) { struct idxd_irq_entry *irq_entry = data; + int rc, processed = 0, retry = 0; + + /* + * There are two lists we are processing. The pending_llist is where + * submmiter adds all the submitted descriptor after sending it to + * the workqueue. It's a lockless singly linked list. The work_list + * is the common linux double linked list. We are in a scenario of + * multiple producers and a single consumer. The producers are all + * the kernel submitters of descriptors, and the consumer is the + * kernel irq handler thread for the msix vector when using threaded + * irq. To work with the restrictions of llist to remain lockless, + * we are doing the following steps: + * 1. Iterate through the work_list and process any completed + * descriptor. Delete the completed entries during iteration. + * 2. llist_del_all() from the pending list. + * 3. Iterate through the llist that was deleted from the pending list + * and process the completed entries. + * 4. If the entry is still waiting on hardware, list_add_tail() to + * the work_list. + * 5. Repeat until no more descriptors. + */ + do { + rc = irq_process_work_list(irq_entry, &processed); + if (rc != 0) { + retry++; + continue; + } + + rc = irq_process_pending_llist(irq_entry, &processed); + } while (rc != 0 && retry != 10); idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id); + if (processed == 0) + return IRQ_NONE; + return IRQ_HANDLED; } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 2dcd13f9f654..f7baa1bbb0c7 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -87,7 +87,9 @@ static int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc, * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); + if (desc->req->flags & DMA_PREP_INTERRUPT) + llist_add(&desc->llnode, + &idxd->irq_entries[vec].pending_llist); return 0; } @@ -125,3 +127,50 @@ static inline void set_completion_address(struct idxd_desc *desc, { *compl_addr = desc->compl_dma; } + +static void op_flag_setup(struct idxd_wq *wq, struct dma_request *req, + u32 *desc_flags) +{ + *desc_flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; + if (req->flags & DMA_PREP_INTERRUPT) + *desc_flags |= IDXD_OP_FLAG_RCI; + if (req->flags & DMA_PREP_FENCE) + *desc_flags |= IDXD_OP_FLAG_FENCE; +} + +int idxd_submit_memcpy(struct idxd_wq *wq, struct dma_request *req) +{ + u32 desc_flags; + struct idxd_device *idxd = wq->idxd; + struct idxd_desc *desc; + int rc; + bool nonblock; + u64 compl_addr, src, dst; + + if (wq->state != IDXD_WQ_ENABLED) + return -EPERM; + + if (req->bvec.bv_len > idxd->max_xfer_bytes) + return -EINVAL; + + op_flag_setup(wq, req, &desc_flags); + nonblock = !!(req->flags & DMA_SUBMIT_NONBLOCK); + desc = idxd_alloc_desc(wq, nonblock); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + set_completion_address(desc, &compl_addr); + set_desc_addresses(req, &src, &dst); + idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_MEMMOVE, + src, dst, req->bvec.bv_len, compl_addr, + desc_flags); + desc->req = req; + + rc = idxd_submit_desc(wq, desc, nonblock); + if (rc < 0) { + idxd_free_desc(wq, desc); + return rc; + } + + return 0; +} diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index b6da47b52116..bcbd6020c8ee 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -55,6 +55,14 @@ static inline bool is_idxd_wq_dev(struct device *dev) return dev ? dev->type == &idxd_wq_device_type : false; } +static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) +{ + if (wq->type == IDXD_WQT_KERNEL && + strcmp(wq->name, "dmaengine") == 0) + return true; + return false; +} + static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { @@ -122,6 +130,12 @@ static int idxd_config_bus_probe(struct device *dev) spin_unlock_irqrestore(&idxd->dev_lock, flags); dev_info(dev, "Device %s enabled\n", dev_name(dev)); + rc = idxd_register_dma_device(idxd); + if (rc < 0) { + spin_unlock_irqrestore(&idxd->dev_lock, flags); + dev_dbg(dev, "Failed to register dmaengine device\n"); + return rc; + } return 0; } else if (is_idxd_wq_dev(dev)) { struct idxd_wq *wq = confdev_to_wq(dev); @@ -194,6 +208,16 @@ static int idxd_config_bus_probe(struct device *dev) wq->client_count = 0; dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); + + if (is_idxd_wq_dmaengine(wq)) { + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + dev_dbg(dev, "DMA channel register failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } + mutex_unlock(&wq->wq_lock); return 0; } @@ -215,6 +239,9 @@ static void disable_wq(struct idxd_wq *wq) return; } + if (is_idxd_wq_dmaengine(wq)) + idxd_unregister_dma_channel(wq); + if (idxd_wq_refcount(wq)) dev_warn(dev, "Clients has claim on wq %d: %d\n", wq->id, idxd_wq_refcount(wq)); @@ -264,6 +291,7 @@ static int idxd_config_bus_remove(struct device *dev) device_release_driver(&wq->conf_dev); } + idxd_unregister_dma_device(idxd); spin_lock_irqsave(&idxd->dev_lock, flags); rc = idxd_device_disable(idxd); spin_unlock_irqrestore(&idxd->dev_lock, flags);