On Thu, Dec 8, 2022 at 1:08 PM Jacek Lawrynowicz <jacek.lawrynowicz@xxxxxxxxxxxxxxx> wrote: > > VPU Memory Management Unit is based on ARM MMU-600. > It allows the creation of multiple virtual address spaces for > the device and map noncontinuous host memory (there is no dedicated > memory on the VPU). > > Address space is implemented as a struct ivpu_mmu_context, it has an ID, > drm_mm allocator for VPU addresses and struct ivpu_mmu_pgtable that > holds actual 3-level, 4KB page table. > Context with ID 0 (global context) is created upon driver initialization > and it's mainly used for mapping memory required to execute > the firmware. > Contexts with non-zero IDs are user contexts allocated each time > the devices is open()-ed and they map command buffers and other > workload-related memory. > Workloads executing in a given contexts have access only > to the memory mapped in this context. > > This patch is has to main files: This patch has two main files: > - ivpu_mmu_context.c handles MMU page tables and memory mapping > - ivpu_mmu.c implements a driver that programs the MMU device > > Co-developed-by: Karol Wachowski <karol.wachowski@xxxxxxxxxxxxxxx> > Signed-off-by: Karol Wachowski <karol.wachowski@xxxxxxxxxxxxxxx> > Co-developed-by: Krystian Pradzynski <krystian.pradzynski@xxxxxxxxxxxxxxx> > Signed-off-by: Krystian Pradzynski <krystian.pradzynski@xxxxxxxxxxxxxxx> > Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@xxxxxxxxxxxxxxx> > --- > drivers/accel/ivpu/Makefile | 4 +- > drivers/accel/ivpu/ivpu_drv.c | 83 ++- > drivers/accel/ivpu/ivpu_drv.h | 6 + > drivers/accel/ivpu/ivpu_hw_mtl.c | 10 + > drivers/accel/ivpu/ivpu_mmu.c | 875 ++++++++++++++++++++++++++ > drivers/accel/ivpu/ivpu_mmu.h | 50 ++ > drivers/accel/ivpu/ivpu_mmu_context.c | 385 ++++++++++++ > drivers/accel/ivpu/ivpu_mmu_context.h | 49 ++ > include/uapi/drm/ivpu_drm.h | 4 + > 9 files changed, 1463 insertions(+), 3 deletions(-) > create mode 100644 drivers/accel/ivpu/ivpu_mmu.c > create mode 100644 drivers/accel/ivpu/ivpu_mmu.h > create mode 100644 drivers/accel/ivpu/ivpu_mmu_context.c > create mode 100644 drivers/accel/ivpu/ivpu_mmu_context.h > > diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile > index 28330c04e52f..37b8bf1d3247 100644 > --- a/drivers/accel/ivpu/Makefile > +++ b/drivers/accel/ivpu/Makefile > @@ -3,6 +3,8 @@ > > intel_vpu-y := \ > ivpu_drv.o \ > - ivpu_hw_mtl.o > + ivpu_hw_mtl.o \ > + ivpu_mmu.o \ > + ivpu_mmu_context.o > > obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o > \ No newline at end of file > diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c > index 8fbccb8d888b..a22d41ca5a4b 100644 > --- a/drivers/accel/ivpu/ivpu_drv.c > +++ b/drivers/accel/ivpu/ivpu_drv.c > @@ -15,6 +15,8 @@ > > #include "ivpu_drv.h" > #include "ivpu_hw.h" > +#include "ivpu_mmu.h" > +#include "ivpu_mmu_context.h" > > #ifndef DRIVER_VERSION_STR > #define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ > @@ -37,23 +39,38 @@ MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); > > struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) > { > + struct ivpu_device *vdev = file_priv->vdev; > + > kref_get(&file_priv->ref); > + > + ivpu_dbg(vdev, KREF, "file_priv get: ctx %u refcount %u\n", > + file_priv->ctx.id, kref_read(&file_priv->ref)); > + > return file_priv; > } > > static void file_priv_release(struct kref *ref) > { > struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); > + struct ivpu_device *vdev = file_priv->vdev; > > + ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); > + > + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); > + WARN_ON(xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); > kfree(file_priv); > } > > void ivpu_file_priv_put(struct ivpu_file_priv **link) > { > struct ivpu_file_priv *file_priv = *link; > + struct ivpu_device *vdev = file_priv->vdev; > > WARN_ON(!file_priv); > > + ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", > + file_priv->ctx.id, kref_read(&file_priv->ref)); > + > *link = NULL; > kref_put(&file_priv->ref, file_priv_release); > } > @@ -88,6 +105,9 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f > case DRM_IVPU_PARAM_CONTEXT_PRIORITY: > args->value = file_priv->priority; > break; > + case DRM_IVPU_PARAM_CONTEXT_ID: > + args->value = file_priv->ctx.id; Why is this needed ? Why does the user need to know its context ID ? > + break; > default: > ret = -EINVAL; > break; > @@ -120,22 +140,59 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) > { > struct ivpu_device *vdev = to_ivpu_device(dev); > struct ivpu_file_priv *file_priv; > + u32 ctx_id; > + void *old; > + int ret; > + > + ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL); > + if (ret) { > + ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); > + return ret; > + } > > file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); > - if (!file_priv) > - return -ENOMEM; > + if (!file_priv) { > + ret = -ENOMEM; > + goto err_xa_erase; > + } > > file_priv->vdev = vdev; > file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; > kref_init(&file_priv->ref); > > + ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); > + if (ret) > + goto err_free_file_priv; > + > + old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); > + if (xa_is_err(old)) { > + ret = xa_err(old); > + ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret); > + goto err_ctx_fini; > + } > + > + ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n", > + ctx_id, current->comm, task_pid_nr(current)); > + > file->driver_priv = file_priv; > return 0; > + > +err_ctx_fini: > + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); > +err_free_file_priv: > + kfree(file_priv); > +err_xa_erase: > + xa_erase_irq(&vdev->context_xa, ctx_id); > + return ret; > } > > static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) > { > struct ivpu_file_priv *file_priv = file->driver_priv; > + struct ivpu_device *vdev = to_ivpu_device(dev); > + > + ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", > + file_priv->ctx.id, current->comm, task_pid_nr(current)); > > ivpu_file_priv_put(&file_priv); > } > @@ -150,6 +207,7 @@ int ivpu_shutdown(struct ivpu_device *vdev) > int ret; > > ivpu_hw_irq_disable(vdev); > + ivpu_mmu_disable(vdev); > > ret = ivpu_hw_power_down(vdev); > if (ret) > @@ -257,6 +315,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) > if (!vdev->hw) > return -ENOMEM; > > + vdev->mmu = drmm_kzalloc(&vdev->drm, sizeof(*vdev->mmu), GFP_KERNEL); > + if (!vdev->mmu) > + return -ENOMEM; > + > vdev->hw->ops = &ivpu_hw_mtl_ops; > vdev->platform = IVPU_PLATFORM_INVALID; > vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; > @@ -289,8 +351,24 @@ static int ivpu_dev_init(struct ivpu_device *vdev) > goto err_xa_destroy; > } > > + ret = ivpu_mmu_global_context_init(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret); > + goto err_power_down; > + } > + > + ret = ivpu_mmu_init(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret); > + goto err_mmu_gctx_fini; > + } > + > return 0; > > +err_mmu_gctx_fini: > + ivpu_mmu_global_context_fini(vdev); > +err_power_down: > + ivpu_hw_power_down(vdev); > err_xa_destroy: > xa_destroy(&vdev->context_xa); > return ret; > @@ -299,6 +377,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) > static void ivpu_dev_fini(struct ivpu_device *vdev) > { > ivpu_shutdown(vdev); > + ivpu_mmu_global_context_fini(vdev); > > drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); > xa_destroy(&vdev->context_xa); > diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h > index 4f859e7ac09e..6e8b88068fc9 100644 > --- a/drivers/accel/ivpu/ivpu_drv.h > +++ b/drivers/accel/ivpu/ivpu_drv.h > @@ -15,6 +15,8 @@ > #include <linux/xarray.h> > #include <uapi/drm/ivpu_drm.h> > > +#include "ivpu_mmu_context.h" > + > #define DRIVER_NAME "intel_vpu" > #define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" > #define DRIVER_DATE "20221208" > @@ -71,6 +73,7 @@ struct ivpu_wa_table { > }; > > struct ivpu_hw_info; > +struct ivpu_mmu_info; > > struct ivpu_device { > struct drm_device drm; > @@ -81,7 +84,9 @@ struct ivpu_device { > > struct ivpu_wa_table wa; > struct ivpu_hw_info *hw; > + struct ivpu_mmu_info *mmu; > > + struct ivpu_mmu_context gctx; > struct xarray context_xa; > struct xa_limit context_xa_limit; > > @@ -100,6 +105,7 @@ struct ivpu_device { > struct ivpu_file_priv { > struct kref ref; > struct ivpu_device *vdev; > + struct ivpu_mmu_context ctx; > u32 priority; > }; > > diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c > index c84bacd4d0f5..39350203452d 100644 > --- a/drivers/accel/ivpu/ivpu_hw_mtl.c > +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c > @@ -7,6 +7,7 @@ > #include "ivpu_hw_mtl_reg.h" > #include "ivpu_hw_reg_io.h" > #include "ivpu_hw.h" > +#include "ivpu_mmu.h" > > #define TILE_FUSE_ENABLE_BOTH 0x0 > #define TILE_FUSE_ENABLE_UPPER 0x1 > @@ -930,6 +931,15 @@ static u32 ivpu_hw_mtl_irqv_handler(struct ivpu_device *vdev, int irq) > > REGV_WR32(MTL_VPU_HOST_SS_ICB_CLEAR_0, status); > > + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) > + ivpu_mmu_irq_evtq_handler(vdev); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) > + ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT, status)) > + ivpu_mmu_irq_gerr_handler(vdev); > + > if (REG_TEST_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT, status)) > ivpu_hw_mtl_irq_wdt_mss_handler(vdev); > > diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c > new file mode 100644 > index 000000000000..2dd9d2287055 > --- /dev/null > +++ b/drivers/accel/ivpu/ivpu_mmu.c > @@ -0,0 +1,875 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (C) 2020-2022 Intel Corporation > + */ > + > +#include <linux/circ_buf.h> > +#include <linux/highmem.h> > + > +#include "ivpu_drv.h" > +#include "ivpu_hw_mtl_reg.h" > +#include "ivpu_hw_reg_io.h" > +#include "ivpu_mmu.h" > +#include "ivpu_mmu_context.h" > + > +#define IVPU_MMU_IDR0_REF 0x080f3e0f > +#define IVPU_MMU_IDR0_REF_SIMICS 0x080f3e1f > +#define IVPU_MMU_IDR1_REF 0x0e739d18 > +#define IVPU_MMU_IDR3_REF 0x0000003c > +#define IVPU_MMU_IDR5_REF 0x00040070 > +#define IVPU_MMU_IDR5_REF_SIMICS 0x00000075 > +#define IVPU_MMU_IDR5_REF_FPGA 0x00800075 > + > +#define IVPU_MMU_CDTAB_ENT_SIZE 64 > +#define IVPU_MMU_CDTAB_ENT_COUNT_LOG2 8 /* 256 entries */ > +#define IVPU_MMU_CDTAB_ENT_COUNT ((u32)1 << IVPU_MMU_CDTAB_ENT_COUNT_LOG2) > + > +#define IVPU_MMU_STREAM_ID0 0 > +#define IVPU_MMU_STREAM_ID3 3 > + > +#define IVPU_MMU_STRTAB_ENT_SIZE 64 > +#define IVPU_MMU_STRTAB_ENT_COUNT 4 > +#define IVPU_MMU_STRTAB_CFG_LOG2SIZE 2 > +#define IVPU_MMU_STRTAB_CFG IVPU_MMU_STRTAB_CFG_LOG2SIZE > + > +#define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */ > +#define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2) > +#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1) > +#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1) > +#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) > +#define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK) > + > +#define IVPU_MMU_CMDQ_CMD_SIZE 16 > +#define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE) > + > +#define IVPU_MMU_EVTQ_CMD_SIZE 32 > +#define IVPU_MMU_EVTQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_EVTQ_CMD_SIZE) > + > +#define IVPU_MMU_CMD_OPCODE GENMASK(7, 0) > + > +#define IVPU_MMU_CMD_SYNC_0_CS GENMASK(13, 12) > +#define IVPU_MMU_CMD_SYNC_0_MSH GENMASK(23, 22) > +#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR GENMASK(27, 24) > +#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR GENMASK(27, 24) > +#define IVPU_MMU_CMD_SYNC_0_MSI_DATA GENMASK(63, 32) > + > +#define IVPU_MMU_CMD_CFGI_0_SSEC BIT(10) > +#define IVPU_MMU_CMD_CFGI_0_SSV BIT(11) > +#define IVPU_MMU_CMD_CFGI_0_SSID GENMASK(31, 12) > +#define IVPU_MMU_CMD_CFGI_0_SID GENMASK(63, 32) > +#define IVPU_MMU_CMD_CFGI_1_RANGE GENMASK(4, 0) > + > +#define IVPU_MMU_CMD_TLBI_0_ASID GENMASK(63, 48) > +#define IVPU_MMU_CMD_TLBI_0_VMID GENMASK(47, 32) > + > +#define CMD_PREFETCH_CFG 0x1 > +#define CMD_CFGI_STE 0x3 > +#define CMD_CFGI_ALL 0x4 > +#define CMD_CFGI_CD 0x5 > +#define CMD_CFGI_CD_ALL 0x6 > +#define CMD_TLBI_NH_ASID 0x11 > +#define CMD_TLBI_EL2_ALL 0x20 > +#define CMD_TLBI_NSNH_ALL 0x30 > +#define CMD_SYNC 0x46 > + > +#define IVPU_MMU_EVT_F_UUT 0x01 > +#define IVPU_MMU_EVT_C_BAD_STREAMID 0x02 > +#define IVPU_MMU_EVT_F_STE_FETCH 0x03 > +#define IVPU_MMU_EVT_C_BAD_STE 0x04 > +#define IVPU_MMU_EVT_F_BAD_ATS_TREQ 0x05 > +#define IVPU_MMU_EVT_F_STREAM_DISABLED 0x06 > +#define IVPU_MMU_EVT_F_TRANSL_FORBIDDEN 0x07 > +#define IVPU_MMU_EVT_C_BAD_SUBSTREAMID 0x08 > +#define IVPU_MMU_EVT_F_CD_FETCH 0x09 > +#define IVPU_MMU_EVT_C_BAD_CD 0x0a > +#define IVPU_MMU_EVT_F_WALK_EABT 0x0b > +#define IVPU_MMU_EVT_F_TRANSLATION 0x10 > +#define IVPU_MMU_EVT_F_ADDR_SIZE 0x11 > +#define IVPU_MMU_EVT_F_ACCESS 0x12 > +#define IVPU_MMU_EVT_F_PERMISSION 0x13 > +#define IVPU_MMU_EVT_F_TLB_CONFLICT 0x20 > +#define IVPU_MMU_EVT_F_CFG_CONFLICT 0x21 > +#define IVPU_MMU_EVT_E_PAGE_REQUEST 0x24 > +#define IVPU_MMU_EVT_F_VMS_FETCH 0x25 > + > +#define IVPU_MMU_EVTS_MAX 8 > + > +#define IVPU_MMU_EVT_OP_MASK GENMASK_ULL(7, 0) > +#define IVPU_MMU_EVT_SSID_MASK GENMASK_ULL(31, 12) > + > +#define IVPU_MMU_Q_BASE_RWA BIT(62) > +#define IVPU_MMU_Q_BASE_ADDR_MASK GENMASK_ULL(51, 5) > +#define IVPU_MMU_STRTAB_BASE_RA BIT(62) > +#define IVPU_MMU_STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6) > + > +#define IVPU_MMU_IRQ_EVTQ_EN BIT(2) > +#define IVPU_MMU_IRQ_GERROR_EN BIT(0) > + > +#define IVPU_MMU_CR0_ATSCHK BIT(4) > +#define IVPU_MMU_CR0_CMDQEN BIT(3) > +#define IVPU_MMU_CR0_EVTQEN BIT(2) > +#define IVPU_MMU_CR0_PRIQEN BIT(1) > +#define IVPU_MMU_CR0_SMMUEN BIT(0) > + > +#define IVPU_MMU_CR1_TABLE_SH GENMASK(11, 10) > +#define IVPU_MMU_CR1_TABLE_OC GENMASK(9, 8) > +#define IVPU_MMU_CR1_TABLE_IC GENMASK(7, 6) > +#define IVPU_MMU_CR1_QUEUE_SH GENMASK(5, 4) > +#define IVPU_MMU_CR1_QUEUE_OC GENMASK(3, 2) > +#define IVPU_MMU_CR1_QUEUE_IC GENMASK(1, 0) > +#define IVPU_MMU_CACHE_NC 0 > +#define IVPU_MMU_CACHE_WB 1 > +#define IVPU_MMU_CACHE_WT 2 > +#define IVPU_MMU_SH_NSH 0 > +#define IVPU_MMU_SH_OSH 2 > +#define IVPU_MMU_SH_ISH 3 > + > +#define IVPU_MMU_CMDQ_OP GENMASK_ULL(7, 0) > + > +#define IVPU_MMU_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) > +#define IVPU_MMU_CD_0_TCR_TG0 GENMASK_ULL(7, 6) > +#define IVPU_MMU_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) > +#define IVPU_MMU_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10) > +#define IVPU_MMU_CD_0_TCR_SH0 GENMASK_ULL(13, 12) > +#define IVPU_MMU_CD_0_TCR_EPD0 BIT_ULL(14) > +#define IVPU_MMU_CD_0_TCR_EPD1 BIT_ULL(30) > +#define IVPU_MMU_CD_0_ENDI BIT(15) > +#define IVPU_MMU_CD_0_V BIT(31) > +#define IVPU_MMU_CD_0_TCR_IPS GENMASK_ULL(34, 32) > +#define IVPU_MMU_CD_0_TCR_TBI0 BIT_ULL(38) > +#define IVPU_MMU_CD_0_AA64 BIT(41) > +#define IVPU_MMU_CD_0_S BIT(44) > +#define IVPU_MMU_CD_0_R BIT(45) > +#define IVPU_MMU_CD_0_A BIT(46) > +#define IVPU_MMU_CD_0_ASET BIT(47) > +#define IVPU_MMU_CD_0_ASID GENMASK_ULL(63, 48) > + > +#define IVPU_MMU_CD_1_TTB0_MASK GENMASK_ULL(51, 4) > + > +#define IVPU_MMU_STE_0_S1CDMAX GENMASK_ULL(63, 59) > +#define IVPU_MMU_STE_0_S1FMT GENMASK_ULL(5, 4) > +#define IVPU_MMU_STE_0_S1FMT_LINEAR 0 > +#define IVPU_MMU_STE_DWORDS 8 > +#define IVPU_MMU_STE_0_CFG_S1_TRANS 5 > +#define IVPU_MMU_STE_0_CFG GENMASK_ULL(3, 1) > +#define IVPU_MMU_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) > +#define IVPU_MMU_STE_0_V BIT(0) > + > +#define IVPU_MMU_STE_1_STRW_NSEL1 0ul > +#define IVPU_MMU_STE_1_CONT GENMASK_ULL(16, 13) > +#define IVPU_MMU_STE_1_STRW GENMASK_ULL(31, 30) > +#define IVPU_MMU_STE_1_PRIVCFG GENMASK_ULL(49, 48) > +#define IVPU_MMU_STE_1_PRIVCFG_UNPRIV 2ul > +#define IVPU_MMU_STE_1_INSTCFG GENMASK_ULL(51, 50) > +#define IVPU_MMU_STE_1_INSTCFG_DATA 2ul > +#define IVPU_MMU_STE_1_MEV BIT(19) > +#define IVPU_MMU_STE_1_S1STALLD BIT(27) > +#define IVPU_MMU_STE_1_S1C_CACHE_NC 0ul > +#define IVPU_MMU_STE_1_S1C_CACHE_WBRA 1ul > +#define IVPU_MMU_STE_1_S1C_CACHE_WT 2ul > +#define IVPU_MMU_STE_1_S1C_CACHE_WB 3ul > +#define IVPU_MMU_STE_1_S1CIR GENMASK_ULL(3, 2) > +#define IVPU_MMU_STE_1_S1COR GENMASK_ULL(5, 4) > +#define IVPU_MMU_STE_1_S1CSH GENMASK_ULL(7, 6) > +#define IVPU_MMU_STE_1_S1DSS GENMASK_ULL(1, 0) > +#define IVPU_MMU_STE_1_S1DSS_TERMINATE 0x0 > + > +#define IVPU_MMU_REG_TIMEOUT_US (10 * USEC_PER_MSEC) > +#define IVPU_MMU_QUEUE_TIMEOUT_US (100 * USEC_PER_MSEC) > + > +#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ)) | \ > + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT)) | \ > + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT)) | \ > + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \ > + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \ > + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \ > + (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT))) > + > +static char *ivpu_mmu_event_to_str(u32 cmd) > +{ > + switch (cmd) { > + case IVPU_MMU_EVT_F_UUT: > + return "Unsupported Upstream Transaction"; > + case IVPU_MMU_EVT_C_BAD_STREAMID: > + return "Transaction StreamID out of range"; > + case IVPU_MMU_EVT_F_STE_FETCH: > + return "Fetch of STE caused external abort"; > + case IVPU_MMU_EVT_C_BAD_STE: > + return "Used STE invalid"; > + case IVPU_MMU_EVT_F_BAD_ATS_TREQ: > + return "Address Request disallowed for a StreamID"; > + case IVPU_MMU_EVT_F_STREAM_DISABLED: > + return "Transaction marks non-substream disabled"; > + case IVPU_MMU_EVT_F_TRANSL_FORBIDDEN: > + return "MMU bypass is disallowed for this StreamID"; > + case IVPU_MMU_EVT_C_BAD_SUBSTREAMID: > + return "Invalid StreamID"; > + case IVPU_MMU_EVT_F_CD_FETCH: > + return "Fetch of CD caused external abort"; > + case IVPU_MMU_EVT_C_BAD_CD: > + return "Fetched CD invalid"; > + case IVPU_MMU_EVT_F_WALK_EABT: > + return " An external abort occurred fetching a TLB"; > + case IVPU_MMU_EVT_F_TRANSLATION: > + return "Translation fault"; > + case IVPU_MMU_EVT_F_ADDR_SIZE: > + return " Output address caused address size fault"; > + case IVPU_MMU_EVT_F_ACCESS: > + return "Access flag fault"; > + case IVPU_MMU_EVT_F_PERMISSION: > + return "Permission fault occurred on page access"; > + case IVPU_MMU_EVT_F_TLB_CONFLICT: > + return "A TLB conflict"; > + case IVPU_MMU_EVT_F_CFG_CONFLICT: > + return "A configuration cache conflict"; > + case IVPU_MMU_EVT_E_PAGE_REQUEST: > + return "Page request hint from a client device"; > + case IVPU_MMU_EVT_F_VMS_FETCH: > + return "Fetch of VMS caused external abort"; > + default: > + return "Unknown CMDQ command"; > + } > +} > + > +static int ivpu_mmu_config_check(struct ivpu_device *vdev) > +{ > + u32 val_ref; > + u32 val; > + > + if (ivpu_is_simics(vdev)) > + val_ref = IVPU_MMU_IDR0_REF_SIMICS; > + else > + val_ref = IVPU_MMU_IDR0_REF; > + > + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR0); > + if (val != val_ref) > + ivpu_err(vdev, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref); What's the meaning of printing an error if this function always returns 0 ? Do you count on the user to look at dmesg ? > + > + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR1); > + if (val != IVPU_MMU_IDR1_REF) > + ivpu_warn(vdev, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF); > + > + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR3); > + if (val != IVPU_MMU_IDR3_REF) > + ivpu_warn(vdev, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF); > + > + if (ivpu_is_simics(vdev)) > + val_ref = IVPU_MMU_IDR5_REF_SIMICS; > + else if (ivpu_is_fpga(vdev)) > + val_ref = IVPU_MMU_IDR5_REF_FPGA; > + else > + val_ref = IVPU_MMU_IDR5_REF; > + > + val = REGV_RD32(MTL_VPU_HOST_MMU_IDR5); > + if (val != val_ref) > + ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref); > + > + return 0; Why not define this function as void if it always returns 0 ? > +} > + > +static int ivpu_mmu_cdtab_alloc(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; > + size_t size = IVPU_MMU_CDTAB_ENT_COUNT * IVPU_MMU_CDTAB_ENT_SIZE; > + > + cdtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &cdtab->dma, GFP_KERNEL); > + if (!cdtab->base) > + return -ENOMEM; > + > + ivpu_dbg(vdev, MMU, "CDTAB alloc: dma=%pad size=%zu\n", &cdtab->dma, size); > + > + return 0; > +} > + > +static int ivpu_mmu_strtab_alloc(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + struct ivpu_mmu_strtab *strtab = &mmu->strtab; > + size_t size = IVPU_MMU_STRTAB_ENT_COUNT * IVPU_MMU_STRTAB_ENT_SIZE; > + > + strtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &strtab->dma, GFP_KERNEL); > + if (!strtab->base) > + return -ENOMEM; > + > + strtab->base_cfg = IVPU_MMU_STRTAB_CFG; > + strtab->dma_q = IVPU_MMU_STRTAB_BASE_RA; > + strtab->dma_q |= strtab->dma & IVPU_MMU_STRTAB_BASE_ADDR_MASK; > + > + ivpu_dbg(vdev, MMU, "STRTAB alloc: dma=%pad dma_q=%pad size=%zu\n", > + &strtab->dma, &strtab->dma_q, size); > + > + return 0; > +} > + > +static int ivpu_mmu_cmdq_alloc(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + struct ivpu_mmu_queue *q = &mmu->cmdq; > + > + q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_CMDQ_SIZE, &q->dma, GFP_KERNEL); > + if (!q->base) > + return -ENOMEM; > + > + q->dma_q = IVPU_MMU_Q_BASE_RWA; > + q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK; > + q->dma_q |= IVPU_MMU_Q_COUNT_LOG2; > + > + ivpu_dbg(vdev, MMU, "CMDQ alloc: dma=%pad dma_q=%pad size=%u\n", > + &q->dma, &q->dma_q, IVPU_MMU_CMDQ_SIZE); > + > + return 0; > +} > + > +static int ivpu_mmu_evtq_alloc(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + struct ivpu_mmu_queue *q = &mmu->evtq; > + > + q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_EVTQ_SIZE, &q->dma, GFP_KERNEL); > + if (!q->base) > + return -ENOMEM; > + > + q->dma_q = IVPU_MMU_Q_BASE_RWA; > + q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK; > + q->dma_q |= IVPU_MMU_Q_COUNT_LOG2; > + > + ivpu_dbg(vdev, MMU, "EVTQ alloc: dma=%pad dma_q=%pad size=%u\n", > + &q->dma, &q->dma_q, IVPU_MMU_EVTQ_SIZE); > + > + return 0; > +} > + > +static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev) > +{ > + int ret; > + > + ret = ivpu_mmu_cdtab_alloc(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to allocate cdtab: %d\n", ret); > + return ret; > + } > + > + ret = ivpu_mmu_strtab_alloc(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to allocate strtab: %d\n", ret); > + return ret; > + } > + > + ret = ivpu_mmu_cmdq_alloc(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to allocate cmdq: %d\n", ret); > + return ret; > + } > + > + ret = ivpu_mmu_evtq_alloc(vdev); > + if (ret) > + ivpu_err(vdev, "Failed to allocate evtq: %d\n", ret); > + > + return ret; > +} > + > +static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val) > +{ > + u32 reg_ack = reg + 4; /* ACK register is 4B after base register */ > + u32 val_ack; > + int ret; > + > + REGV_WR32(reg, val); > + > + ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US); > + if (ret) > + ivpu_err(vdev, "Failed to write register 0x%x\n", reg); > + > + return ret; > +} > + > +static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) > +{ > + u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN; > + int ret; > + > + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, 0); > + if (ret) > + return ret; > + > + return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, irq_ctrl); > +} > + > +static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; > + > + return REGV_POLL(MTL_VPU_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons), > + IVPU_MMU_QUEUE_TIMEOUT_US); > +} > + > +static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1) > +{ > + struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; > + u64 *queue_buffer = q->base; > + int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); > + > + if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) { > + ivpu_err(vdev, "Failed to write MMU CMD %s\n", name); > + return -EBUSY; > + } > + > + queue_buffer[idx] = data0; > + queue_buffer[idx + 1] = data1; > + q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK; > + > + ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1); > + > + return 0; > +} > + > +static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; > + u64 val; > + int ret; > + > + val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) | > + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) | > + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) | > + FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf); > + > + ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0); > + if (ret) > + return ret; > + > + clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); Why do you need this call ? Not just here, in all the places in the driver. This is not a function call that is commonly seen in PCI drivers... > + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, q->prod); > + > + ret = ivpu_mmu_cmdq_wait_for_cons(vdev); > + if (ret) > + ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret); > + > + return ret; > +} > + > +static int ivpu_mmu_cmdq_write_cfgi_all(struct ivpu_device *vdev) > +{ > + u64 data0 = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_CFGI_ALL); > + u64 data1 = FIELD_PREP(IVPU_MMU_CMD_CFGI_1_RANGE, 0x1f); > + > + return ivpu_mmu_cmdq_cmd_write(vdev, "CFGI_ALL", data0, data1); > +} > + > +static int ivpu_mmu_cmdq_write_tlbi_nh_asid(struct ivpu_device *vdev, u16 ssid) > +{ > + u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NH_ASID) | > + FIELD_PREP(IVPU_MMU_CMD_TLBI_0_ASID, ssid); > + > + return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NH_ASID", val, 0); > +} > + > +static int ivpu_mmu_cmdq_write_tlbi_nsnh_all(struct ivpu_device *vdev) > +{ > + u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NSNH_ALL); > + > + return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NSNH_ALL", val, 0); > +} > + > +static int ivpu_mmu_reset(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + u32 val; > + int ret; > + > + memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE); > + clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE); > + mmu->cmdq.prod = 0; > + mmu->cmdq.cons = 0; > + > + memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); > + clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); > + mmu->evtq.prod = 0; > + mmu->evtq.cons = 0; > + > + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, 0); > + if (ret) > + return ret; > + > + val = FIELD_PREP(IVPU_MMU_CR1_TABLE_SH, IVPU_MMU_SH_ISH) | > + FIELD_PREP(IVPU_MMU_CR1_TABLE_OC, IVPU_MMU_CACHE_WB) | > + FIELD_PREP(IVPU_MMU_CR1_TABLE_IC, IVPU_MMU_CACHE_WB) | > + FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) | > + FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) | > + FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB); > + REGV_WR32(MTL_VPU_HOST_MMU_CR1, val); > + > + REGV_WR64(MTL_VPU_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q); > + REGV_WR32(MTL_VPU_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg); > + > + REGV_WR64(MTL_VPU_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q); > + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, 0); > + REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_CONS, 0); > + > + val = IVPU_MMU_CR0_CMDQEN; > + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); > + if (ret) > + return ret; > + > + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); > + if (ret) > + return ret; > + > + ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev); > + if (ret) > + return ret; > + > + ret = ivpu_mmu_cmdq_sync(vdev); > + if (ret) > + return ret; > + > + REGV_WR64(MTL_VPU_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q); > + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC, 0); > + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, 0); > + > + val |= IVPU_MMU_CR0_EVTQEN; > + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); > + if (ret) > + return ret; > + > + val |= IVPU_MMU_CR0_ATSCHK; > + ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); > + if (ret) > + return ret; > + > + ret = ivpu_mmu_irqs_setup(vdev); > + if (ret) > + return ret; > + > + val |= IVPU_MMU_CR0_SMMUEN; > + return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val); > +} > + > +static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + struct ivpu_mmu_strtab *strtab = &mmu->strtab; > + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; > + u64 *entry = strtab->base + (sid * IVPU_MMU_STRTAB_ENT_SIZE); > + u64 str[2]; > + > + str[0] = FIELD_PREP(IVPU_MMU_STE_0_CFG, IVPU_MMU_STE_0_CFG_S1_TRANS) | > + FIELD_PREP(IVPU_MMU_STE_0_S1CDMAX, IVPU_MMU_CDTAB_ENT_COUNT_LOG2) | > + FIELD_PREP(IVPU_MMU_STE_0_S1FMT, IVPU_MMU_STE_0_S1FMT_LINEAR) | > + IVPU_MMU_STE_0_V | > + (cdtab->dma & IVPU_MMU_STE_0_S1CTXPTR_MASK); > + > + str[1] = FIELD_PREP(IVPU_MMU_STE_1_S1DSS, IVPU_MMU_STE_1_S1DSS_TERMINATE) | > + FIELD_PREP(IVPU_MMU_STE_1_S1CIR, IVPU_MMU_STE_1_S1C_CACHE_NC) | > + FIELD_PREP(IVPU_MMU_STE_1_S1COR, IVPU_MMU_STE_1_S1C_CACHE_NC) | > + FIELD_PREP(IVPU_MMU_STE_1_S1CSH, IVPU_MMU_SH_NSH) | > + FIELD_PREP(IVPU_MMU_STE_1_PRIVCFG, IVPU_MMU_STE_1_PRIVCFG_UNPRIV) | > + FIELD_PREP(IVPU_MMU_STE_1_INSTCFG, IVPU_MMU_STE_1_INSTCFG_DATA) | > + FIELD_PREP(IVPU_MMU_STE_1_STRW, IVPU_MMU_STE_1_STRW_NSEL1) | > + FIELD_PREP(IVPU_MMU_STE_1_CONT, IVPU_MMU_STRTAB_CFG_LOG2SIZE) | > + IVPU_MMU_STE_1_MEV | > + IVPU_MMU_STE_1_S1STALLD; > + > + WRITE_ONCE(entry[1], str[1]); > + WRITE_ONCE(entry[0], str[0]); > + > + clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE); > + > + ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]); > +} > + > +static int ivpu_mmu_strtab_init(struct ivpu_device *vdev) > +{ > + ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID0); > + ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID3); > + > + return 0; > +} > + > +int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + int ret; > + > + ret = mutex_lock_interruptible(&mmu->lock); > + if (ret) > + return ret; > + > + if (!mmu->on) { > + ret = 0; > + goto unlock; > + } Is this check really necessary ? Or is it for debug/bringup cases ? Excluding debug/bringup, I would imagine that the code should never reach tlb invalidation if MMU is not enabled. Also, from using mutex_lock_interruptible I infer that we only get to this function within a process context. And afaics, mmu enable/disable happens on device init/fini, where no user process exists... > + > + ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid); > + if (ret) > + goto unlock; > + > + ret = ivpu_mmu_cmdq_sync(vdev); > +unlock: > + mutex_unlock(&mmu->lock); > + return ret; > +} > + > +static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; > + u64 *entry; > + u64 cd[4]; > + int ret; > + > + if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) > + return -EINVAL; > + > + ret = mutex_lock_interruptible(&mmu->lock); Why is this entire function protected by mmu lock ? afaics, you prepare a context descriptor in system memory, and that descriptor is single per context and every context has its own location inside the cdtab->base (based on ssid). Which concurrency are you protecting against ? And if there is no concurrency, I would expect to get the lock only before calling ivpu_mmu_cmdq_write_cfgi_all() > + if (ret) > + return ret; > + > + entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); > + > + if (cd_dma != 0) { > + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) | > + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | > + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | > + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | > + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | > + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) | > + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | > + IVPU_MMU_CD_0_TCR_EPD1 | > + IVPU_MMU_CD_0_AA64 | > + IVPU_MMU_CD_0_R | > + IVPU_MMU_CD_0_A | > + IVPU_MMU_CD_0_ASET | > + IVPU_MMU_CD_0_V; > + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; > + cd[2] = 0; > + cd[3] = 0x0000000000007444; > + } else { > + memset(cd, 0, sizeof(cd)); > + } > + > + WRITE_ONCE(entry[1], cd[1]); > + WRITE_ONCE(entry[2], cd[2]); > + WRITE_ONCE(entry[3], cd[3]); > + WRITE_ONCE(entry[0], cd[0]); > + > + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); > + > + ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", > + cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); > + > + if (!mmu->on) { > + ret = 0; > + goto unlock; > + } > + > + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); > + if (ret) > + goto unlock; > + > + ret = ivpu_mmu_cmdq_sync(vdev); > +unlock: > + mutex_unlock(&mmu->lock); > + return ret; > +} > + > +static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) > +{ > + int ret; > + > + ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); > + if (ret) > + ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); > + > + return ret; > +} > + > +static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) > +{ > + int ret; > + > + if (ssid == 0) { > + ivpu_err(vdev, "Invalid SSID: %u\n", ssid); > + return -EINVAL; > + } > + > + ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); > + if (ret) > + ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); > + > + return ret; > +} > + > +int ivpu_mmu_init(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + int ret; > + > + ivpu_dbg(vdev, MMU, "Init..\n"); > + > + drmm_mutex_init(&vdev->drm, &mmu->lock); > + > + ret = ivpu_mmu_config_check(vdev); > + if (ret) > + return ret; See my comments in ivpu_mmu_config_check(). This check is useless > + > + ret = ivpu_mmu_structs_alloc(vdev); > + if (ret) > + return ret; > + > + ret = ivpu_mmu_strtab_init(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); > + return ret; > + } > + > + ret = ivpu_mmu_cd_add_gbl(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); > + return ret; > + } > + > + ret = ivpu_mmu_enable(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); > + return ret; > + } > + > + ivpu_dbg(vdev, MMU, "Init done\n"); > + > + return 0; > +} > + > +int ivpu_mmu_enable(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + int ret; > + > + mutex_lock(&mmu->lock); > + > + mmu->on = true; > + > + ret = ivpu_mmu_reset(vdev); > + if (ret) { > + ivpu_err(vdev, "Failed to reset MMU: %d\n", ret); > + goto err; > + } > + > + ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); > + if (ret) > + goto err; > + > + ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev); > + if (ret) > + goto err; > + > + ret = ivpu_mmu_cmdq_sync(vdev); > + if (ret) > + goto err; > + > + mutex_unlock(&mmu->lock); > + > + return 0; > +err: > + mmu->on = false; > + mutex_unlock(&mmu->lock); > + return ret; > +} > + > +void ivpu_mmu_disable(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_info *mmu = vdev->mmu; > + > + mutex_lock(&mmu->lock); > + mmu->on = false; > + mutex_unlock(&mmu->lock); > +} > + > +static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event) > +{ > + u32 ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); > + u32 op = FIELD_GET(IVPU_MMU_EVT_OP_MASK, event[0]); > + u64 fetch_addr = ((u64)event[7]) << 32 | event[6]; > + u64 in_addr = ((u64)event[5]) << 32 | event[4]; > + u32 sid = event[1]; > + > + ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n", > + op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr); > +} > + > +static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) > +{ > + struct ivpu_mmu_queue *evtq = &vdev->mmu->evtq; > + u32 idx = IVPU_MMU_Q_IDX(evtq->cons); > + u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE); > + > + evtq->prod = REGV_RD32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC); > + if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) > + return NULL; > + > + clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); > + > + evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; > + REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, evtq->cons); > + > + return evt; > +} > + > +void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) > +{ > + u32 *event; > + u32 ssid; > + > + ivpu_dbg(vdev, IRQ, "MMU event queue\n"); > + > + while ((event = ivpu_mmu_get_event(vdev)) != NULL) > + ivpu_mmu_dump_event(vdev, event); This is done in irq context, correct ? Is there some protection against endless (or very large number) stream of events (can be real thing or just f/w bug) ? > +} > + > +void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) > +{ > + u32 gerror_val, gerrorn_val, active; > + > + ivpu_dbg(vdev, IRQ, "MMU error\n"); > + > + gerror_val = REGV_RD32(MTL_VPU_HOST_MMU_GERROR); > + gerrorn_val = REGV_RD32(MTL_VPU_HOST_MMU_GERRORN); > + > + active = gerror_val ^ gerrorn_val; > + if (!(active & IVPU_MMU_GERROR_ERR_MASK)) > + return; > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT, active)) > + ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT, active)) > + ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT, active)) > + ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT, active)) > + ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT, active)) > + ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT, active)) > + ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n"); > + > + if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ, active)) > + ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n"); > + > + REGV_WR32(MTL_VPU_HOST_MMU_GERRORN, gerror_val); > +} > + > +int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) > +{ > + return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); > +} > + > +void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) > +{ > + ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ > +} > diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h > new file mode 100644 > index 000000000000..466d698c7142 > --- /dev/null > +++ b/drivers/accel/ivpu/ivpu_mmu.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (C) 2020-2022 Intel Corporation > + */ > + > +#ifndef __IVPU_MMU_H__ > +#define __IVPU_MMU_H__ > + > +struct ivpu_device; > + > +struct ivpu_mmu_cdtab { > + void *base; > + dma_addr_t dma; > +}; > + > +struct ivpu_mmu_strtab { > + void *base; > + dma_addr_t dma; > + u64 dma_q; > + u32 base_cfg; > +}; > + > +struct ivpu_mmu_queue { > + void *base; > + dma_addr_t dma; > + u64 dma_q; > + u32 prod; > + u32 cons; > +}; > + > +struct ivpu_mmu_info { > + struct mutex lock; /* Protects cdtab, strtab, cmdq, on */ > + struct ivpu_mmu_cdtab cdtab; > + struct ivpu_mmu_strtab strtab; > + struct ivpu_mmu_queue cmdq; > + struct ivpu_mmu_queue evtq; > + bool on; > +}; > + > +int ivpu_mmu_init(struct ivpu_device *vdev); > +void ivpu_mmu_disable(struct ivpu_device *vdev); > +int ivpu_mmu_enable(struct ivpu_device *vdev); > +int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); > +void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); > +int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); > + > +void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); > +void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); > + > +#endif /* __IVPU_MMU_H__ */ > diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c > new file mode 100644 > index 000000000000..eb25e613bb90 > --- /dev/null > +++ b/drivers/accel/ivpu/ivpu_mmu_context.c > @@ -0,0 +1,385 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (C) 2020-2022 Intel Corporation > + */ > + > +#include <linux/bitfield.h> > +#include <linux/highmem.h> > + > +#include "ivpu_drv.h" > +#include "ivpu_hw.h" > +#include "ivpu_mmu.h" > +#include "ivpu_mmu_context.h" > + > +#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30) > +#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) > +#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12) > +#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0) > +#define IVPU_MMU_ENTRY_FLAG_NG BIT(11) > +#define IVPU_MMU_ENTRY_FLAG_AF BIT(10) > +#define IVPU_MMU_ENTRY_FLAG_USER BIT(6) > +#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2) > +#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) > +#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0) > + > +#define IVPU_MMU_PAGE_SIZE SZ_4K > +#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) > +#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) > +#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) > + > +#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000 > +#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID) > +#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK) > +#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \ > + IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID) > + > +static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) > +{ > + dma_addr_t pgd_dma; > + u64 *pgd; > + > + pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL); > + if (!pgd) > + return -ENOMEM; > + > + pgtable->pgd = pgd; > + pgtable->pgd_dma = pgd_dma; > + > + return 0; > +} > + > +static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) > +{ > + int pgd_index, pmd_index; > + > + for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) { > + u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index]; > + u64 *pmd = pgtable->pgd_entries[pgd_index]; > + > + if (!pmd_entries) > + continue; > + > + for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) { > + if (pmd_entries[pmd_index]) > + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, > + pmd_entries[pmd_index], > + pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); > + } > + > + kfree(pmd_entries); > + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index], > + pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); > + } > + > + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd, > + pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK); > +} > + > +static u64* > +ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index) > +{ > + u64 **pmd_entries; > + dma_addr_t pmd_dma; > + u64 *pmd; > + > + if (pgtable->pgd_entries[pgd_index]) > + return pgtable->pgd_entries[pgd_index]; > + > + pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); > + if (!pmd) > + return NULL; > + > + pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); > + if (!pmd_entries) > + goto err_free_pgd; > + > + pgtable->pgd_entries[pgd_index] = pmd; > + pgtable->pgd_cpu_entries[pgd_index] = pmd_entries; > + pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID; > + > + return pmd; > + > +err_free_pgd: > + dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma); > + return NULL; > +} > + > +static u64* > +ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, > + int pgd_index, int pmd_index) > +{ > + dma_addr_t pte_dma; > + u64 *pte; > + > + if (pgtable->pgd_cpu_entries[pgd_index][pmd_index]) > + return pgtable->pgd_cpu_entries[pgd_index][pmd_index]; > + > + pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); > + if (!pte) > + return NULL; > + > + pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte; > + pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID; > + > + return pte; > +} > + > +static int > +ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, > + u64 vpu_addr, dma_addr_t dma_addr, int prot) > +{ > + u64 *pte; > + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); > + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); > + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); > + > + /* Allocate PMD - second level page table if needed */ > + if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index)) > + return -ENOMEM; > + > + /* Allocate PTE - third level page table if needed */ > + pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index); > + if (!pte) > + return -ENOMEM; > + > + /* Update PTE - third level page table with DMA address */ > + pte[pte_index] = dma_addr | prot; > + > + return 0; > +} > + > +static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) > +{ > + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); > + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); > + int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); > + > + /* Update PTE with dummy physical address and clear flags */ > + ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID; > +} > + > +static void > +ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) > +{ > + u64 end_addr = vpu_addr + size; > + u64 *pgd = ctx->pgtable.pgd; > + > + /* Align to PMD entry (2 MB) */ > + vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); > + > + while (vpu_addr < end_addr) { > + int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); > + u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; > + u64 *pmd = ctx->pgtable.pgd_entries[pgd_index]; > + > + while (vpu_addr < end_addr && vpu_addr < pmd_end) { > + int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); > + u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index]; > + > + clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE); > + vpu_addr += IVPU_MMU_PTE_MAP_SIZE; > + } > + clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE); > + } > + clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE); > +} > + > +static int > +ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, > + u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot) > +{ > + while (size) { > + int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); > + > + if (ret) > + return ret; > + > + vpu_addr += IVPU_MMU_PAGE_SIZE; > + dma_addr += IVPU_MMU_PAGE_SIZE; > + size -= IVPU_MMU_PAGE_SIZE; > + } > + > + return 0; > +} > + > +static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) > +{ > + while (size) { > + ivpu_mmu_context_unmap_page(ctx, vpu_addr); > + vpu_addr += IVPU_MMU_PAGE_SIZE; > + size -= IVPU_MMU_PAGE_SIZE; > + } > +} > + > +int > +ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, > + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) > +{ Hard to review these functions as the callers are not in this patch AND there is documenation on the function. > + struct scatterlist *sg; > + int prot; > + int ret; > + u64 i; > + > + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) > + return -EINVAL; > + /* > + * VPU is only 32 bit, but DMA engine is 38 bit > + * Ranges < 2 GB are reserved for VPU internal registers > + * Limit range to 8 GB > + */ > + if (vpu_addr < SZ_2G || vpu_addr > SZ_8G) > + return -EINVAL; > + > + prot = IVPU_MMU_ENTRY_MAPPED; > + if (llc_coherent) > + prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT; > + > + mutex_lock(&ctx->lock); > + > + for_each_sgtable_dma_sg(sgt, sg, i) { > + u64 dma_addr = sg_dma_address(sg) - sg->offset; > + size_t size = sg_dma_len(sg) + sg->offset; > + > + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); > + if (ret) { > + ivpu_err(vdev, "Failed to map context pages\n"); > + mutex_unlock(&ctx->lock); > + return ret; > + } > + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); > + vpu_addr += size; > + } > + > + mutex_unlock(&ctx->lock); > + > + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); > + if (ret) > + ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); > + return ret; > +} > + > +void > +ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, > + u64 vpu_addr, struct sg_table *sgt) > +{ > + struct scatterlist *sg; > + int ret; > + u64 i; > + > + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) > + ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr); > + > + mutex_lock(&ctx->lock); > + > + for_each_sgtable_dma_sg(sgt, sg, i) { > + size_t size = sg_dma_len(sg) + sg->offset; > + > + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); > + ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); > + vpu_addr += size; > + } > + > + mutex_unlock(&ctx->lock); > + > + ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); > + if (ret) > + ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); > +} > + > +int > +ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, > + const struct ivpu_addr_range *range, > + u64 size, struct drm_mm_node *node) > +{ > + lockdep_assert_held(&ctx->lock); > + > + return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, > + 0, range->start, range->end, DRM_MM_INSERT_BEST); > +} > + > +void > +ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) > +{ > + lockdep_assert_held(&ctx->lock); > + > + drm_mm_remove_node(node); > +} > + > +static int > +ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) > +{ > + u64 start, end; > + int ret; > + > + mutex_init(&ctx->lock); > + INIT_LIST_HEAD(&ctx->bo_list); > + > + ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); > + if (ret) > + return ret; > + > + if (!context_id) { > + start = vdev->hw->ranges.global_low.start; > + end = vdev->hw->ranges.global_high.end; > + } else { > + start = vdev->hw->ranges.user_low.start; > + end = vdev->hw->ranges.user_high.end; > + } > + > + drm_mm_init(&ctx->mm, start, end - start); > + ctx->id = context_id; > + > + return 0; > +} > + > +static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) > +{ > + drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd); > + > + mutex_destroy(&ctx->lock); > + ivpu_mmu_pgtable_free(vdev, &ctx->pgtable); > + drm_mm_takedown(&ctx->mm); > +} > + > +int ivpu_mmu_global_context_init(struct ivpu_device *vdev) > +{ > + return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); > +} > + > +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) > +{ > + return ivpu_mmu_context_fini(vdev, &vdev->gctx); > +} > + > +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) > +{ > + int ret; > + > + drm_WARN_ON(&vdev->drm, !ctx_id); > + > + ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); > + if (ret) { > + ivpu_err(vdev, "Failed to initialize context: %d\n", ret); > + return ret; > + } > + > + ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); > + if (ret) { > + ivpu_err(vdev, "Failed to set page table: %d\n", ret); > + goto err_context_fini; > + } > + > + return 0; > + > +err_context_fini: > + ivpu_mmu_context_fini(vdev, ctx); > + return ret; > +} > + > +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) > +{ > + drm_WARN_ON(&vdev->drm, !ctx->id); > + > + ivpu_mmu_clear_pgtable(vdev, ctx->id); > + ivpu_mmu_context_fini(vdev, ctx); > +} > diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h > new file mode 100644 > index 000000000000..a358de65a30d > --- /dev/null > +++ b/drivers/accel/ivpu/ivpu_mmu_context.h > @@ -0,0 +1,49 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (C) 2020-2022 Intel Corporation > + */ > + > +#ifndef __IVPU_MMU_CONTEXT_H__ > +#define __IVPU_MMU_CONTEXT_H__ > + > +#include <drm/drm_mm.h> > + > +struct ivpu_device; > +struct ivpu_file_priv; > +struct ivpu_addr_range; > + > +#define IVPU_MMU_PGTABLE_ENTRIES 512 > + > +struct ivpu_mmu_pgtable { > + u64 **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES]; > + u64 *pgd_entries[IVPU_MMU_PGTABLE_ENTRIES]; > + u64 *pgd; > + dma_addr_t pgd_dma; > +}; > + > +struct ivpu_mmu_context { > + struct mutex lock; /* protects: mm, pgtable, bo_list */ > + struct drm_mm mm; > + struct ivpu_mmu_pgtable pgtable; > + struct list_head bo_list; > + u32 id; > +}; > + > +int ivpu_mmu_global_context_init(struct ivpu_device *vdev); > +void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); > + > +int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); > +void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); > + > +int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, > + const struct ivpu_addr_range *range, > + u64 size, struct drm_mm_node *node); > +void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, > + struct drm_mm_node *node); > + > +int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, > + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); > +void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, > + u64 vpu_addr, struct sg_table *sgt); > + > +#endif /* __IVPU_MMU_CONTEXT_H__ */ > diff --git a/include/uapi/drm/ivpu_drm.h b/include/uapi/drm/ivpu_drm.h > index 922cbf30ce34..fc97ce215e79 100644 > --- a/include/uapi/drm/ivpu_drm.h > +++ b/include/uapi/drm/ivpu_drm.h > @@ -38,6 +38,7 @@ extern "C" { > #define DRM_IVPU_PARAM_NUM_CONTEXTS 4 > #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 > #define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 > +#define DRM_IVPU_PARAM_CONTEXT_ID 7 > > #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 > > @@ -78,6 +79,9 @@ struct drm_ivpu_param { > * Value of current context scheduling priority (read-write). > * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. > * > + * %DRM_IVPU_PARAM_CONTEXT_ID: > + * Current context ID, always greater than 0 (read-only) > + * > */ > __u32 param; > > -- > 2.34.1 >