From: Tomasz Jankowski <tomasz1.jankowski@xxxxxxxxx> Although the patch adds GNA_WAIT ioctl, it's main purpose is to provide FIFO work-queue logic, which offloads each score operation in sequence to GNA accelerator. When it's done, process(es) WAITing for score to be finished are woken up. Signed-off-by: Tomasz Jankowski <tomasz1.jankowski@xxxxxxxxx> Tested-by: Mikolaj Grzybowski <mikolajx.grzybowski@xxxxxxxxx> Co-developed-by: Anisha Dattatraya Kulkarni <anisha.dattatraya.kulkarni@xxxxxxxxx> Signed-off-by: Anisha Dattatraya Kulkarni <anisha.dattatraya.kulkarni@xxxxxxxxx> Co-developed-by: Jianxun Zhang <jianxun.zhang@xxxxxxxxxxxxxxx> Signed-off-by: Jianxun Zhang <jianxun.zhang@xxxxxxxxxxxxxxx> Co-developed-by: Maciej Kwapulinski <maciej.kwapulinski@xxxxxxxxxxxxxxx> Signed-off-by: Maciej Kwapulinski <maciej.kwapulinski@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/gna/Kbuild | 2 +- drivers/gpu/drm/gna/gna_device.c | 26 +++- drivers/gpu/drm/gna/gna_device.h | 18 ++- drivers/gpu/drm/gna/gna_gem.h | 3 + drivers/gpu/drm/gna/gna_hw.c | 110 +++++++++++++++ drivers/gpu/drm/gna/gna_hw.h | 30 ++++ drivers/gpu/drm/gna/gna_ioctl.c | 93 +++++++++++++ drivers/gpu/drm/gna/gna_mem.c | 128 +++++++++++++++++ drivers/gpu/drm/gna/gna_mem.h | 6 + drivers/gpu/drm/gna/gna_pci.c | 34 ++++- drivers/gpu/drm/gna/gna_request.c | 88 ++++++++++++ drivers/gpu/drm/gna/gna_request.h | 17 +++ drivers/gpu/drm/gna/gna_score.c | 222 ++++++++++++++++++++++++++++++ drivers/gpu/drm/gna/gna_score.h | 11 ++ include/uapi/drm/gna_drm.h | 49 +++++++ 15 files changed, 833 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/gna/gna_hw.c create mode 100644 drivers/gpu/drm/gna/gna_score.c create mode 100644 drivers/gpu/drm/gna/gna_score.h diff --git a/drivers/gpu/drm/gna/Kbuild b/drivers/gpu/drm/gna/Kbuild index 15c5e4fe7e4d..d799c9530f79 100644 --- a/drivers/gpu/drm/gna/Kbuild +++ b/drivers/gpu/drm/gna/Kbuild @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -gna-y := gna_device.o gna_ioctl.o gna_mem.o gna_pci.o gna_request.o +gna-y := gna_device.o gna_hw.o gna_ioctl.o gna_mem.o gna_pci.o gna_request.o gna_score.o obj-$(CONFIG_DRM_GNA) += gna.o diff --git a/drivers/gpu/drm/gna/gna_device.c b/drivers/gpu/drm/gna/gna_device.c index 0f8ec5a9dde4..2ef5be16dc9a 100644 --- a/drivers/gpu/drm/gna/gna_device.c +++ b/drivers/gpu/drm/gna/gna_device.c @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/dma-mapping.h> +#include <linux/interrupt.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/workqueue.h> @@ -28,6 +29,7 @@ static const struct drm_ioctl_desc gna_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(GNA_GEM_NEW, gna_gem_new_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(GNA_GEM_FREE, gna_gem_free_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(GNA_COMPUTE, gna_score_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(GNA_WAIT, gna_wait_ioctl, DRM_RENDER_ALLOW), }; @@ -47,6 +49,16 @@ static int gna_drm_dev_init(struct drm_device *dev) return drmm_add_action_or_reset(dev, gna_drm_dev_fini, NULL); } +static irqreturn_t gna_interrupt(int irq, void *priv) +{ + struct gna_device *gna_priv; + + gna_priv = (struct gna_device *)priv; + gna_priv->dev_busy = false; + wake_up(&gna_priv->dev_busy_waitq); + return IRQ_HANDLED; +} + static void gna_workqueue_fini(struct drm_device *drm, void *data) { struct workqueue_struct *request_wq = data; @@ -96,7 +108,7 @@ static const struct drm_driver gna_drm_driver = { .patchlevel = DRIVER_PATCHLEVEL, }; -int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase) +int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase, int irq) { struct gna_device *gna_priv; struct drm_device *drm_dev; @@ -130,15 +142,27 @@ int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem dev_dbg(parent, "maximum memory size %llu num pd %d\n", gna_priv->info.max_hw_mem, gna_priv->info.num_pagetables); + dev_dbg(parent, "desc rsvd size %d mmu vamax size %d\n", + gna_priv->info.desc_info.rsvd_size, + gna_priv->info.desc_info.mmu_info.vamax_size); + + mutex_init(&gna_priv->mmu_lock); + atomic_set(&gna_priv->request_count, 0); mutex_init(&gna_priv->reqlist_lock); INIT_LIST_HEAD(&gna_priv->request_list); + init_waitqueue_head(&gna_priv->dev_busy_waitq); + err = gna_workqueue_init(gna_priv); if (err) return err; + err = devm_request_irq(parent, irq, gna_interrupt, + IRQF_SHARED, gna_name(gna_priv), gna_priv); + if (err) + return err; dev_set_drvdata(parent, drm_dev); diff --git a/drivers/gpu/drm/gna/gna_device.h b/drivers/gpu/drm/gna/gna_device.h index b628ffa9970f..ffdc3d1d1f87 100644 --- a/drivers/gpu/drm/gna/gna_device.h +++ b/drivers/gpu/drm/gna/gna_device.h @@ -35,12 +35,20 @@ struct gna_device { int recovery_timeout_jiffies; + /* hardware status set by interrupt handler */ + u32 hw_status; + /* device related resources */ void __iomem *iobase; struct gna_dev_info info; struct gna_hw_info hw_info; struct gna_mmu_object mmu; + struct mutex mmu_lock; + + /* if true, then gna device is processing */ + bool dev_busy; + struct wait_queue_head dev_busy_waitq; struct list_head request_list; /* protects request_list */ @@ -52,7 +60,7 @@ struct gna_device { atomic_t enqueued_requests; }; -int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase); +int gna_probe(struct device *parent, struct gna_dev_info *dev_info, void __iomem *iobase, int irq); int gna_getparam(struct gna_device *gna_priv, union gna_parameter *param); int gna_getparam_ioctl(struct drm_device *dev, void *data, @@ -67,11 +75,19 @@ int gna_gem_free_ioctl(struct drm_device *dev, void *data, int gna_score_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int gna_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + static inline u32 gna_reg_read(struct gna_device *gna_priv, u32 reg) { return readl(gna_priv->iobase + reg); } +static inline void gna_reg_write(struct gna_device *gna_priv, u32 reg, u32 val) +{ + writel(val, gna_priv->iobase + reg); +} + static inline const char *gna_name(struct gna_device *gna_priv) { return gna_priv->drm.unique; diff --git a/drivers/gpu/drm/gna/gna_gem.h b/drivers/gpu/drm/gna/gna_gem.h index 18ffa8245380..92372fc93718 100644 --- a/drivers/gpu/drm/gna/gna_gem.h +++ b/drivers/gpu/drm/gna/gna_gem.h @@ -6,6 +6,7 @@ #include <drm/drm_gem_shmem_helper.h> +#include <linux/wait.h> #include <linux/workqueue.h> struct gna_gem_object { @@ -14,6 +15,8 @@ struct gna_gem_object { uint32_t handle; struct work_struct work; + + struct wait_queue_head waitq; }; #endif /* __GNA_GEM_H__ */ diff --git a/drivers/gpu/drm/gna/gna_hw.c b/drivers/gpu/drm/gna/gna_hw.c new file mode 100644 index 000000000000..dff7c6b3edea --- /dev/null +++ b/drivers/gpu/drm/gna/gna_hw.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright(c) 2017-2022 Intel Corporation + +#include <linux/bitfield.h> +#include <linux/iopoll.h> + +#include <uapi/drm/gna_drm.h> + +#include "gna_device.h" +#include "gna_hw.h" + +int gna_parse_hw_status(struct gna_device *gna_priv, u32 hw_status) +{ + if (hw_status & GNA_ERROR) { + dev_dbg(gna_dev(gna_priv), "GNA completed with errors: %#x\n", hw_status); + return -EIO; + } + + if (hw_status & GNA_STS_SCORE_COMPLETED) { + dev_dbg(gna_dev(gna_priv), "GNA completed successfully: %#x\n", hw_status); + return 0; + } + + dev_dbg(gna_dev(gna_priv), "GNA not completed, status: %#x\n", hw_status); + return -ENODATA; +} + +void gna_print_error_status(struct gna_device *gna_priv, u32 hw_status) +{ + if (hw_status & GNA_STS_PARAM_OOR) + dev_dbg(gna_dev(gna_priv), "GNA error: Param Out Range Error\n"); + + if (hw_status & GNA_STS_VA_OOR) + dev_dbg(gna_dev(gna_priv), "GNA error: VA Out of Range Error\n"); + + if (hw_status & GNA_STS_PCI_MMU_ERR) + dev_dbg(gna_dev(gna_priv), "GNA error: PCI MMU Error\n"); + + if (hw_status & GNA_STS_PCI_DMA_ERR) + dev_dbg(gna_dev(gna_priv), "GNA error: PCI MMU Error\n"); + + if (hw_status & GNA_STS_PCI_UNEXCOMPL_ERR) + dev_dbg(gna_dev(gna_priv), "GNA error: PCI Unexpected Completion Error\n"); + + if (hw_status & GNA_STS_SATURATE) + dev_dbg(gna_dev(gna_priv), "GNA error: Saturation Reached !\n"); +} + +bool gna_hw_perf_enabled(struct gna_device *gna_priv) +{ + u32 ctrl = gna_reg_read(gna_priv, GNA_MMIO_CTRL); + + return !!FIELD_GET(GNA_CTRL_COMP_STATS_EN, ctrl); +} + +void gna_start_scoring(struct gna_device *gna_priv, + struct gna_compute_cfg *compute_cfg) +{ + u32 ctrl = gna_reg_read(gna_priv, GNA_MMIO_CTRL); + + ctrl |= GNA_CTRL_START_ACCEL | GNA_CTRL_COMP_INT_EN | GNA_CTRL_ERR_INT_EN; + + ctrl &= ~GNA_CTRL_COMP_STATS_EN; + ctrl |= FIELD_PREP(GNA_CTRL_COMP_STATS_EN, + compute_cfg->hw_perf_encoding & FIELD_MAX(GNA_CTRL_COMP_STATS_EN)); + + ctrl &= ~GNA_CTRL_ACTIVE_LIST_EN; + ctrl |= FIELD_PREP(GNA_CTRL_ACTIVE_LIST_EN, + compute_cfg->active_list_on & FIELD_MAX(GNA_CTRL_ACTIVE_LIST_EN)); + + ctrl &= ~GNA_CTRL_OP_MODE; + ctrl |= FIELD_PREP(GNA_CTRL_OP_MODE, + compute_cfg->gna_mode & FIELD_MAX(GNA_CTRL_OP_MODE)); + + gna_reg_write(gna_priv, GNA_MMIO_CTRL, ctrl); +} + +static void gna_clear_saturation(struct gna_device *gna_priv) +{ + u32 val; + + val = gna_reg_read(gna_priv, GNA_MMIO_STS); + if (val & GNA_STS_SATURATE) { + dev_dbg(gna_dev(gna_priv), "status (saturation): %#x\n", val); + + val = val & GNA_STS_SATURATE; + gna_reg_write(gna_priv, GNA_MMIO_STS, val); + } +} + +int gna_abort_hw(struct gna_device *gna_priv) +{ + u32 val; + + /* saturation bit in the GNA status register needs + * to be explicitly cleared. + */ + gna_clear_saturation(gna_priv); + + val = gna_reg_read(gna_priv, GNA_MMIO_STS); + dev_dbg(gna_dev(gna_priv), "status (before abort): %#x\n", val); + + val = gna_reg_read(gna_priv, GNA_MMIO_CTRL); + val |= GNA_CTRL_ABORT_CLR_ACCEL; + gna_reg_write(gna_priv, GNA_MMIO_CTRL, val); + + return readl_poll_timeout(gna_priv->iobase + GNA_MMIO_STS, val, + !(val & 0x1), + 0, 1000); +} diff --git a/drivers/gpu/drm/gna/gna_hw.h b/drivers/gpu/drm/gna/gna_hw.h index 514166a87c4d..97338e1be3b6 100644 --- a/drivers/gpu/drm/gna/gna_hw.h +++ b/drivers/gpu/drm/gna/gna_hw.h @@ -4,8 +4,12 @@ #ifndef __GNA_HW_H__ #define __GNA_HW_H__ +#include <linux/bits.h> #include <linux/mm_types.h> +struct gna_compute_cfg; +struct gna_device; + #define GNA_FEATURES \ .max_hw_mem = 256 * 1024 * 1024, \ .num_pagetables = 64, \ @@ -42,11 +46,30 @@ #define GNA_DEV_HWID_MTL 0x7E4C /* GNA MMIO registers */ +#define GNA_MMIO_STS 0x80 +#define GNA_MMIO_CTRL 0x84 +#define GNA_MMIO_PTC 0x8C +#define GNA_MMIO_PSC 0x90 +#define GNA_MMIO_D0I3C 0xA8 +#define GNA_MMIO_DESBASE 0xB0 #define GNA_MMIO_IBUFFS 0xB4 +#define GNA_PT_ENTRY_SIZE 4 +/* there are up to 1024 32-bit pointers in one page in Page Table (L1) */ +#define GNA_PT_LENGTH (PAGE_SIZE / GNA_PT_ENTRY_SIZE) + #define GNA_PGDIRN_LEN 64 +#define GNA_PGDIR_ENTRIES 1024 /* 32-bit page addresses */ #define GNA_PGDIR_INVALID 1 +#define GNA_CTRL_START_ACCEL BIT(0) +#define GNA_CTRL_ACTIVE_LIST_EN BIT(1) +#define GNA_CTRL_ABORT_CLR_ACCEL BIT(2) +#define GNA_CTRL_OP_MODE GENMASK(6, 5) +#define GNA_CTRL_COMP_INT_EN BIT(8) +#define GNA_CTRL_ERR_INT_EN BIT(10) +#define GNA_CTRL_COMP_STATS_EN GENMASK(15, 12) + struct gna_mmu_info { u32 vamax_size; u32 rsvd_size; @@ -74,4 +97,11 @@ struct gna_dev_info { struct gna_desc_info desc_info; }; +int gna_abort_hw(struct gna_device *gna_priv); +bool gna_hw_perf_enabled(struct gna_device *gna_priv); +int gna_parse_hw_status(struct gna_device *gna_priv, u32 hw_status); +void gna_print_error_status(struct gna_device *gna_priv, u32 hw_status); +void gna_start_scoring(struct gna_device *gna_priv, + struct gna_compute_cfg *compute_cfg); + #endif // __GNA_HW_H__ diff --git a/drivers/gpu/drm/gna/gna_ioctl.c b/drivers/gpu/drm/gna/gna_ioctl.c index ab3a2b789589..f3c805e946ee 100644 --- a/drivers/gpu/drm/gna/gna_ioctl.c +++ b/drivers/gpu/drm/gna/gna_ioctl.c @@ -5,6 +5,11 @@ #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_file.h> +#include <linux/jiffies.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/wait.h> #include <linux/workqueue.h> #include <uapi/drm/gna_drm.h> @@ -33,6 +38,86 @@ int gna_score_ioctl(struct drm_device *dev, void *data, return 0; } +static struct gna_request *gna_find_request_by_id(u64 req_id, struct gna_device *gna_priv) +{ + struct gna_request *req, *found_req; + struct list_head *reqs_list; + + mutex_lock(&gna_priv->reqlist_lock); + + reqs_list = &gna_priv->request_list; + found_req = NULL; + if (!list_empty(reqs_list)) { + list_for_each_entry(req, reqs_list, node) { + if (req_id == req->request_id) { + found_req = req; + kref_get(&found_req->refcount); + break; + } + } + } + + mutex_unlock(&gna_priv->reqlist_lock); + + return found_req; +} + +int gna_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct gna_device *gna_priv = to_gna_device(dev); + union gna_wait *wait_data = data; + struct gna_request *score_request; + u64 request_id; + u32 timeout; + int ret = 0; + + request_id = wait_data->in.request_id; + timeout = wait_data->in.timeout; + + score_request = gna_find_request_by_id(request_id, gna_priv); + + if (!score_request) { + dev_dbg(gna_dev(gna_priv), "could not find request, id: %llu\n", request_id); + return -EINVAL; + } + + if (score_request->drm_f != file) { + dev_dbg(gna_dev(gna_priv), "illegal file_priv: %p != %p\n", score_request->drm_f, file); + ret = -EINVAL; + goto out; + } + + ret = wait_event_interruptible_timeout(score_request->waitq, score_request->state == DONE, + msecs_to_jiffies(timeout)); + if (ret == 0 || ret == -ERESTARTSYS) { + dev_dbg(gna_dev(gna_priv), "request timed out, id: %llu\n", request_id); + ret = -EBUSY; + goto out; + } + + wait_data->out.hw_perf = score_request->hw_perf; + wait_data->out.drv_perf = score_request->drv_perf; + wait_data->out.hw_status = score_request->hw_status; + + ret = score_request->status; + + dev_dbg(gna_dev(gna_priv), "request status: %d, hw status: %#x\n", + score_request->status, score_request->hw_status); + + cancel_work_sync(&score_request->work); + mutex_lock(&gna_priv->reqlist_lock); + if (!list_empty(&score_request->node)) { + list_del_init(&score_request->node); + kref_put(&score_request->refcount, gna_request_release); // due to gna_priv->request_list removal! + } + mutex_unlock(&gna_priv->reqlist_lock); + +out: + kref_put(&score_request->refcount, gna_request_release); + return ret; +} + int gna_gem_free_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -49,10 +134,16 @@ int gna_gem_free_ioctl(struct drm_device *dev, void *data, gnagemo = to_gna_gem_obj(to_drm_gem_shmem_obj(drmgemo)); queue_work(gna_priv->request_wq, &gnagemo->work); + if (wait_event_interruptible(gnagemo->waitq, true)) { + ret = -ERESTARTSYS; + goto out; + } + cancel_work_sync(&gnagemo->work); ret = drm_gem_handle_delete(file, args->handle); +out: drm_gem_object_put(drmgemo); return ret; } @@ -111,5 +202,7 @@ int gna_gem_new_ioctl(struct drm_device *dev, void *data, gnagemo->handle = args->out.handle; INIT_WORK(&gnagemo->work, gna_gem_obj_release_work); + init_waitqueue_head(&gnagemo->waitq); + return 0; } diff --git a/drivers/gpu/drm/gna/gna_mem.c b/drivers/gpu/drm/gna/gna_mem.c index 54c5a4d68d06..8719c3195fce 100644 --- a/drivers/gpu/drm/gna/gna_mem.c +++ b/drivers/gpu/drm/gna/gna_mem.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright(c) 2017-2022 Intel Corporation +#include <drm/drm_gem.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_managed.h> @@ -12,6 +13,10 @@ #include <linux/math.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/wait.h> #include <linux/workqueue.h> #include "gna_device.h" @@ -85,6 +90,127 @@ int gna_mmu_init(struct gna_device *gna_priv) return 0; } +static struct scatterlist *gna_iterate_sgl(u64 sg_elems, struct scatterlist *sgl, dma_addr_t *sg_page, + int *sg_page_len, int *sg_pages) +{ + while (sg_elems-- > 0) { + (*sg_page) += PAGE_SIZE; + (*sg_pages)++; + if (*sg_pages == *sg_page_len) { + sgl = sg_next(sgl); + if (!sgl) + break; + + *sg_page = sg_dma_address(sgl); + *sg_page_len = + round_up(sg_dma_len(sgl), PAGE_SIZE) + >> PAGE_SHIFT; + *sg_pages = 0; + } + } + + return sgl; +} + + +void gna_mmu_add(struct gna_device *gna_priv, struct drm_gem_shmem_object *drmshmemo) +{ + struct gna_mmu_object *mmu; + struct scatterlist *sgl; + dma_addr_t sg_page; + int sg_page_len; + u32 *pagetable; + u32 mmu_page; + int sg_pages; + int i; + int j; + + mmu = &gna_priv->mmu; + mutex_lock(&gna_priv->mmu_lock); + + j = mmu->filled_pages; + sgl = drmshmemo->sgt->sgl; + + if (!sgl) { + dev_warn(gna_dev(gna_priv), "empty scatter list in memory object\n"); + goto warn_empty_sgl; + } + sg_page = sg_dma_address(sgl); + sg_page_len = round_up(sg_dma_len(sgl), PAGE_SIZE) >> PAGE_SHIFT; + sg_pages = 0; + + for (i = mmu->filled_pts; i < mmu->num_pagetables; i++) { + if (!sgl) + break; + + pagetable = mmu->pagetables[i]; + + for (j = mmu->filled_pages; j < GNA_PT_LENGTH; j++) { + mmu_page = sg_page >> PAGE_SHIFT; + pagetable[j] = mmu_page; + + mmu->filled_pages++; + + sgl = gna_iterate_sgl(1, sgl, &sg_page, &sg_page_len, + &sg_pages); + if (!sgl) + break; + } + + if (j == GNA_PT_LENGTH) { + mmu->filled_pages = 0; + mmu->filled_pts++; + } + } + + mmu->hwdesc->mmu.vamaxaddr = + (mmu->filled_pts * PAGE_SIZE * GNA_PGDIR_ENTRIES) + + (mmu->filled_pages * PAGE_SIZE) - 1; + dev_dbg(gna_dev(gna_priv), "vamaxaddr: %u\n", mmu->hwdesc->mmu.vamaxaddr); + +warn_empty_sgl: + mutex_unlock(&gna_priv->mmu_lock); +} + +void gna_mmu_clear(struct gna_device *gna_priv) +{ + struct gna_mmu_object *mmu; + int i; + + mmu = &gna_priv->mmu; + mutex_lock(&gna_priv->mmu_lock); + + for (i = 0; i < mmu->filled_pts; i++) + memset(mmu->pagetables[i], 0, PAGE_SIZE); + + if (mmu->filled_pages > 0) + memset(mmu->pagetables[mmu->filled_pts], 0, mmu->filled_pages * GNA_PT_ENTRY_SIZE); + + mmu->filled_pts = 0; + mmu->filled_pages = 0; + mmu->hwdesc->mmu.vamaxaddr = 0; + + mutex_unlock(&gna_priv->mmu_lock); +} + +bool gna_gem_object_put_pages_sgt(struct gna_gem_object *gnagemo) +{ + struct drm_gem_shmem_object *shmem = &gnagemo->base; + struct drm_gem_object *drmgemo = &shmem->base; + + if (!mutex_trylock(&shmem->pages_lock)) + return false; + dma_unmap_sgtable(drmgemo->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); + sg_free_table(shmem->sgt); + kfree(shmem->sgt); + shmem->sgt = NULL; + mutex_unlock(&shmem->pages_lock); + + drm_gem_shmem_put_pages(shmem); + + return true; +} + static void gna_delete_score_requests(u32 handle, struct gna_device *gna_priv) { struct gna_request *req, *temp_req; @@ -118,4 +244,6 @@ void gna_gem_obj_release_work(struct work_struct *work) gnagemo = container_of(work, struct gna_gem_object, work); gna_delete_score_requests(gnagemo->handle, to_gna_device(gnagemo->base.base.dev)); + + wake_up_interruptible(&gnagemo->waitq); } diff --git a/drivers/gpu/drm/gna/gna_mem.h b/drivers/gpu/drm/gna/gna_mem.h index 9d8251900231..92193f9d608d 100644 --- a/drivers/gpu/drm/gna/gna_mem.h +++ b/drivers/gpu/drm/gna/gna_mem.h @@ -47,6 +47,12 @@ struct gna_mmu_object { int gna_mmu_init(struct gna_device *gna_priv); +void gna_mmu_add(struct gna_device *gna_priv, struct drm_gem_shmem_object *drmshmemo); + +void gna_mmu_clear(struct gna_device *gna_priv); + +bool gna_gem_object_put_pages_sgt(struct gna_gem_object *gna_obj); + void gna_gem_obj_release_work(struct work_struct *work); #endif // __GNA_MEM_H__ diff --git a/drivers/gpu/drm/gna/gna_pci.c b/drivers/gpu/drm/gna/gna_pci.c index 9a6c82362e32..034554b417a8 100644 --- a/drivers/gpu/drm/gna/gna_pci.c +++ b/drivers/gpu/drm/gna/gna_pci.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright(c) 2017-2022 Intel Corporation +#include <linux/device.h> #include <linux/module.h> #include <linux/pci.h> @@ -75,10 +76,33 @@ static const struct pci_device_id gna_pci_ids[] = { { } }; +static void gna_irq_vectors_fini(void *data) +{ + struct pci_dev *pcidev = data; + + pci_free_irq_vectors(pcidev); +} + +static int gna_irq_vectors_init(struct pci_dev *pcidev) +{ + int ret; + + ret = pci_alloc_irq_vectors(pcidev, 1, 1, PCI_IRQ_ALL_TYPES); + if (ret < 0) + return ret; + + ret = devm_add_action(&pcidev->dev, gna_irq_vectors_fini, pcidev); + if (ret) + gna_irq_vectors_fini(pcidev); + + return ret; +} + int gna_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *pci_id) { struct gna_dev_info *dev_info; void __iomem *iobase; + int irq; int err; err = pcim_enable_device(pcidev); @@ -93,9 +117,17 @@ int gna_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *pci_id) pci_set_master(pcidev); + err = gna_irq_vectors_init(pcidev); + if (err < 0) + return err; + + irq = pci_irq_vector(pcidev, 0); + if (irq < 0) + return irq; + dev_info = (struct gna_dev_info *)pci_id->driver_data; - err = gna_probe(&pcidev->dev, dev_info, iobase); + err = gna_probe(&pcidev->dev, dev_info, iobase, irq); if (err) return err; diff --git a/drivers/gpu/drm/gna/gna_request.c b/drivers/gpu/drm/gna/gna_request.c index 7151d7c2b353..9efaea61ac68 100644 --- a/drivers/gpu/drm/gna/gna_request.c +++ b/drivers/gpu/drm/gna/gna_request.c @@ -12,10 +12,14 @@ #include <linux/math.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/timekeeping.h> #include <linux/uaccess.h> #include "gna_device.h" +#include "gna_hw.h" +#include "gna_mem.h" #include "gna_request.h" +#include "gna_score.h" int gna_validate_score_config(struct gna_compute_cfg *compute_cfg, struct gna_device *gna_priv) @@ -45,6 +49,38 @@ int gna_validate_score_config(struct gna_compute_cfg *compute_cfg, return 0; } +static void gna_request_update_status(struct gna_request *score_request) +{ + struct gna_device *gna_priv = to_gna_device(score_request->drm_f->minor->dev); + /* The gna_priv's hw_status should be updated first */ + u32 hw_status = gna_priv->hw_status; + u32 stall_cycles; + u32 total_cycles; + + /* Technically, the time stamp can be a bit later than + * when the hw actually completed scoring. Here we just + * do our best in a deferred work, unless we want to + * tax isr for a more accurate record. + */ + score_request->drv_perf.hw_completed = ktime_get_ns(); + + score_request->hw_status = hw_status; + + score_request->status = gna_parse_hw_status(gna_priv, hw_status); + + if (gna_hw_perf_enabled(gna_priv)) { + if (hw_status & GNA_STS_STATISTICS_VALID) { + total_cycles = gna_reg_read(gna_priv, GNA_MMIO_PTC); + stall_cycles = gna_reg_read(gna_priv, GNA_MMIO_PSC); + score_request->hw_perf.total = total_cycles; + score_request->hw_perf.stall = stall_cycles; + } else + dev_warn(gna_dev(gna_priv), "GNA statistics missing\n"); + } + if (unlikely(hw_status & GNA_ERROR)) + gna_print_error_status(gna_priv, hw_status); +} + static void gna_request_make_zombie(struct gna_request *score_request) { int i; @@ -60,15 +96,64 @@ static void gna_request_make_zombie(struct gna_request *score_request) static void gna_request_process(struct work_struct *work) { + struct gna_buffer_with_object *buffer; struct gna_request *score_request; struct gna_device *gna_priv; + unsigned long hw_timeout; + int ret; + u64 i; score_request = container_of(work, struct gna_request, work); gna_priv = to_gna_device(score_request->drm_f->minor->dev); + score_request->state = ACTIVE; + + score_request->drv_perf.pre_processing = ktime_get_ns(); + + /* Set busy flag before kicking off HW. The isr will clear it and wake up us. There is + * no difference if isr is missed in a timeout situation of the last request. We just + * always set it busy and let the wait_event_timeout check the reset. + * wq: X -> true + * isr: X -> false + */ + gna_priv->dev_busy = true; + + ret = gna_score(score_request); + if (ret) { + score_request->status = ret; + goto tail; + } + + score_request->drv_perf.processing = ktime_get_ns(); + + hw_timeout = gna_priv->recovery_timeout_jiffies; + + hw_timeout = wait_event_timeout(gna_priv->dev_busy_waitq, + !gna_priv->dev_busy, hw_timeout); + + if (!hw_timeout) + dev_warn(gna_dev(gna_priv), "hardware timeout occurred\n"); + + gna_priv->hw_status = gna_reg_read(gna_priv, GNA_MMIO_STS); + + gna_request_update_status(score_request); + + ret = gna_abort_hw(gna_priv); + if (ret < 0 && score_request->status == 0) + score_request->status = ret; // -ETIMEDOUT + + gna_mmu_clear(gna_priv); + + for (i = 0, buffer = score_request->buffer_list; i < score_request->buffer_count; i++, buffer++) + gna_gem_object_put_pages_sgt(buffer->gem); + +tail: + score_request->drv_perf.completion = ktime_get_ns(); + score_request->state = DONE; gna_request_make_zombie(score_request); atomic_dec(&gna_priv->enqueued_requests); + wake_up_interruptible_all(&score_request->waitq); } static struct gna_request *gna_request_create(struct drm_file *file, @@ -92,6 +177,8 @@ static struct gna_request *gna_request_create(struct drm_file *file, score_request->request_id = atomic_inc_return(&gna_priv->request_count); score_request->compute_cfg = *compute_cfg; score_request->drm_f = file; + score_request->state = NEW; + init_waitqueue_head(&score_request->waitq); INIT_WORK(&score_request->work, gna_request_process); INIT_LIST_HEAD(&score_request->node); @@ -334,5 +421,6 @@ void gna_request_release(struct kref *ref) struct gna_request *score_request = container_of(ref, struct gna_request, refcount); gna_request_make_zombie(score_request); + wake_up_interruptible_all(&score_request->waitq); kfree(score_request); } diff --git a/drivers/gpu/drm/gna/gna_request.h b/drivers/gpu/drm/gna/gna_request.h index 432c30863e7e..d056e70fb369 100644 --- a/drivers/gpu/drm/gna/gna_request.h +++ b/drivers/gpu/drm/gna/gna_request.h @@ -6,6 +6,7 @@ #include <linux/kref.h> #include <linux/types.h> +#include <linux/wait.h> #include <linux/workqueue.h> #include <uapi/drm/gna_drm.h> @@ -14,6 +15,12 @@ struct gna_device; struct gna_gem_object; struct drm_file; +enum gna_request_state { + NEW, + ACTIVE, + DONE, +}; + struct gna_buffer_with_object { struct gna_buffer gna; struct gna_gem_object *gem; @@ -26,6 +33,15 @@ struct gna_request { struct drm_file *drm_f; + u32 hw_status; + + enum gna_request_state state; + + int status; + + struct gna_hw_perf hw_perf; + struct gna_drv_perf drv_perf; + struct list_head node; struct gna_compute_cfg compute_cfg; @@ -34,6 +50,7 @@ struct gna_request { u64 buffer_count; struct work_struct work; + struct wait_queue_head waitq; }; int gna_validate_score_config(struct gna_compute_cfg *compute_cfg, diff --git a/drivers/gpu/drm/gna/gna_score.c b/drivers/gpu/drm/gna/gna_score.c new file mode 100644 index 000000000000..529270657a83 --- /dev/null +++ b/drivers/gpu/drm/gna/gna_score.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright(c) 2017-2022 Intel Corporation + +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> + +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/math.h> +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/string.h> +#include <linux/types.h> + +#include <uapi/drm/gna_drm.h> + +#include "../drm_internal.h" + +#include "gna_device.h" +#include "gna_gem.h" +#include "gna_hw.h" +#include "gna_mem.h" +#include "gna_request.h" +#include "gna_score.h" + +static int gna_do_patch_memory(struct gna_device *gna_priv, + struct gna_memory_patch *patch, void *vaddr) +{ + size_t size; + void *dest; + u64 value; + + value = patch->value; + size = patch->size; + dest = (u8 *)vaddr + patch->offset; + + switch (size) { + case 0: + return -EFAULT; + case sizeof(u8): + *((u8 *)dest) = (u8)value; + break; + case sizeof(u16): + *((u16 *)dest) = (u16)value; + break; + case sizeof(u32): + *((u32 *)dest) = (u32)value; + break; + case sizeof(u64): + *((u64 *)dest) = (u64)value; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int gna_patch_memory(struct gna_device *gna_priv, struct gna_buffer_with_object *buffer) +{ + struct drm_gem_shmem_object *drmshmemo = &buffer->gem->base; + struct gna_gem_object *gnagemo = buffer->gem; + struct gna_buffer *gnab = &buffer->gna; + struct gna_memory_patch *patch; + struct iosys_map vmap; + struct sg_table *sgt; + int ret = 0; + u32 i; + + dev_dbg(gna_dev(gna_priv), "handle: %u, patch_count, %llu\n", + gnab->handle, gnab->patch_count); + + sgt = drm_gem_shmem_get_pages_sgt(drmshmemo); + + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto err; + } + + if (gnab->patch_count) { + ret = drm_gem_vmap(&drmshmemo->base, &vmap); + + if (ret) + goto err_pages_sgt; + + patch = (struct gna_memory_patch *)(uintptr_t)gnab->patches_ptr; + for (i = 0; i < gnab->patch_count; i++, patch++) { + ret = gna_do_patch_memory(gna_priv, patch, vmap.vaddr); + if (ret) + break; + } + + kvfree((void *)(uintptr_t)gnab->patches_ptr); + gnab->patches_ptr = 0; + drm_gem_vunmap(&drmshmemo->base, &vmap); + if (ret) // ret from gna_do_patch_memory + goto err_pages_sgt; + } + + gna_mmu_add(gna_priv, drmshmemo); + + return 0; + +err_pages_sgt: + gna_gem_object_put_pages_sgt(gnagemo); +err: + return ret; +} + +static struct gna_buffer_with_object *gna_find_buffer(struct gna_buffer_with_object *buffer_list, + u32 buffer_count, u32 mmu_offset, u32 *memory_offset) +{ + struct gna_buffer_with_object *buffer; + u32 memory_size; + u32 offset; + u32 i; + + offset = 0; + for (i = 0; i < buffer_count; i++) { + buffer = buffer_list + i; + memory_size = round_up(buffer->gna.size, PAGE_SIZE); + if (mmu_offset < offset + memory_size) { + *memory_offset = offset; + return buffer; + } + offset += memory_size; + } + + return NULL; +} + +static int gna_copy_gmm_config(struct gna_device *gna_priv, + struct gna_buffer_with_object *buffer_list, + u32 buffer_count, u32 mmu_offset) +{ + struct gna_buffer_with_object *buffer; + struct gna_hw_descriptor *hwdesc; + struct drm_gem_object *drmgemo; + struct gna_mmu_object *mmu; + struct iosys_map vmap; + u32 memory_offset; + u8 *gmm_desc; + int ret = 0; + + mmu = &gna_priv->mmu; + hwdesc = mmu->hwdesc; + + buffer = gna_find_buffer(buffer_list, buffer_count, mmu_offset, &memory_offset); + if (!buffer) + return -EINVAL; + + drmgemo = &buffer->gem->base.base; + + ret = drm_gem_vmap(drmgemo, &vmap); + + if (!ret) { + ret = -ENOMEM; + return ret; + } + + gmm_desc = (u8 *)vmap.vaddr + (mmu_offset - memory_offset); + memcpy(&hwdesc->xnn_config, gmm_desc, sizeof(struct gna_xnn_descriptor)); + drm_gem_vunmap(drmgemo, &vmap); + + return 0; +} + +int gna_score(struct gna_request *score_request) +{ + struct gna_buffer_with_object *buffer; + struct gna_xnn_descriptor *xnn_config; + struct gna_compute_cfg *compute_cfg; + struct gna_device *gna_priv; + struct gna_mmu_object *mmu; + u64 buffer_count; + u32 desc_base; + int ret; + u64 i; + + ret = 0; + + gna_priv = to_gna_device(score_request->drm_f->minor->dev); + + mmu = &gna_priv->mmu; + xnn_config = &mmu->hwdesc->xnn_config; + compute_cfg = &score_request->compute_cfg; + + buffer_count = score_request->buffer_count; + + for (i = 0, buffer = score_request->buffer_list; i < buffer_count; i++, buffer++) { + ret = gna_patch_memory(gna_priv, buffer); + if (ret) + goto err; + } + + switch (compute_cfg->gna_mode) { + case GNA_MODE_XNN: + dev_dbg(gna_dev(gna_priv), "xNN mode; labase: %d, lacount: %d\n", + compute_cfg->layer_base, compute_cfg->layer_count); + xnn_config->labase = compute_cfg->layer_base; + xnn_config->lacount = compute_cfg->layer_count; + break; + case GNA_MODE_GMM: + dev_dbg(gna_dev(gna_priv), "GMM mode; offset: %d\n", compute_cfg->layer_base); + ret = gna_copy_gmm_config(gna_priv, score_request->buffer_list, + buffer_count, compute_cfg->layer_base); + if (ret) + goto err; + break; + default: + ret = -EINVAL; + goto err; + } + + desc_base = (u32)(mmu->hwdesc_dma >> PAGE_SHIFT); + gna_reg_write(gna_priv, GNA_MMIO_DESBASE, desc_base); + + gna_start_scoring(gna_priv, compute_cfg); + +err: + return ret; +} diff --git a/drivers/gpu/drm/gna/gna_score.h b/drivers/gpu/drm/gna/gna_score.h new file mode 100644 index 000000000000..5b154d3623e0 --- /dev/null +++ b/drivers/gpu/drm/gna/gna_score.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2017-2022 Intel Corporation */ + +#ifndef __GNA_SCORE_H__ +#define __GNA_SCORE_H__ + +struct gna_request; + +int gna_score(struct gna_request *score_request); + +#endif // __GNA_SCORE_H__ diff --git a/include/uapi/drm/gna_drm.h b/include/uapi/drm/gna_drm.h index aaae9a46dec5..677343d88987 100644 --- a/include/uapi/drm/gna_drm.h +++ b/include/uapi/drm/gna_drm.h @@ -20,6 +20,22 @@ #define GNA_PARAM_INPUT_BUFFER_S 3 #define GNA_PARAM_DDI_VERSION 4 +#define GNA_STS_SCORE_COMPLETED _BITUL(0) +#define GNA_STS_STATISTICS_VALID _BITUL(3) +#define GNA_STS_PCI_MMU_ERR _BITUL(4) +#define GNA_STS_PCI_DMA_ERR _BITUL(5) +#define GNA_STS_PCI_UNEXCOMPL_ERR _BITUL(6) +#define GNA_STS_VA_OOR _BITUL(7) +#define GNA_STS_PARAM_OOR _BITUL(8) +#define GNA_STS_SATURATE _BITUL(17) + +#define GNA_ERROR \ + (GNA_STS_PCI_DMA_ERR |\ + GNA_STS_PCI_MMU_ERR |\ + GNA_STS_PCI_UNEXCOMPL_ERR |\ + GNA_STS_PARAM_OOR |\ + GNA_STS_VA_OOR) + #define GNA_DEV_TYPE_0_9 0x09 #define GNA_DEV_TYPE_1_0 0x10 #define GNA_DEV_TYPE_2_0 0x20 @@ -50,6 +66,22 @@ struct gna_buffer { __u64 patches_ptr; }; +/* + * Driver performance timestamps in nanoseconds. + * Values regard system boot time, but do not count during suspend. + */ +struct gna_drv_perf { + __u64 pre_processing; /* driver starts pre-processing */ + __u64 processing; /* hw starts processing */ + __u64 hw_completed; /* hw finishes processing */ + __u64 completion; /* driver finishes post-processing */ +}; + +struct gna_hw_perf { + __u64 total; + __u64 stall; +}; + struct gna_compute_cfg { __u32 layer_base; __u32 layer_count; @@ -88,6 +120,21 @@ union gna_compute { } out; }; +union gna_wait { + struct { + __u64 request_id; + __u32 timeout; + __u32 pad; + } in; + + struct { + __u32 hw_status; + __u32 pad; + struct gna_drv_perf drv_perf; + struct gna_hw_perf hw_perf; + } out; +}; + struct gna_mem_id { __u32 handle; __u32 pad; @@ -111,10 +158,12 @@ struct gna_gem_free { #define DRM_GNA_GEM_NEW 0x01 #define DRM_GNA_GEM_FREE 0x02 #define DRM_GNA_COMPUTE 0x03 +#define DRM_GNA_WAIT 0x04 #define DRM_IOCTL_GNA_GET_PARAMETER DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GET_PARAMETER, union gna_parameter) #define DRM_IOCTL_GNA_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GEM_NEW, union gna_gem_new) #define DRM_IOCTL_GNA_GEM_FREE DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_GEM_FREE, struct gna_gem_free) #define DRM_IOCTL_GNA_COMPUTE DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_COMPUTE, union gna_compute) +#define DRM_IOCTL_GNA_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_GNA_WAIT, union gna_wait) #endif /* _GNA_DRM_H_ */ -- 2.25.1