Add a GEM implementation based on drm_gem_shmem, and support code for the PowerVR GPU MMU. The GPU VA manager is used for address space management. Changes since v6: - Don't initialise kernel_vm_ctx when using MIPS firmware processor - Rename drm_gpuva_manager uses to drm_gpuvm - Sync GEM object to device on creation Changes since v5: - Use WRITE_ONCE() when writing to page tables - Add memory barriers to page table insertion - Fixed double backing page alloc on page table objects - Fix BO mask checks in DRM_IOCTL_PVR_CREATE_BO handler - Document use of pvr_page_table_*_idx when preallocing page table objs - Remove pvr_vm_gpuva_mapping_init() - Remove NULL check for unmap op in remap function - Protect gem object with mutex during drm_gpuva_link/unlink - Defer free or release of page table pages until after TLB flush - Use drm_gpuva_op_remap_get_unmap_range() helper Changes since v4: - Correct sync function in vmap/vunmap function documentation - Update for upstream GPU VA manager - Fix missing frees when unmapping drm_gpuva objects - Always zero GEM BOs on creation Changes since v3: - Split MMU and VM code - Register page table allocations with kmemleak - Use drm_dev_{enter,exit} Changes since v2: - Use GPU VA manager - Use drm_gem_shmem Co-developed-by: Matt Coster <matt.coster@xxxxxxxxxx> Signed-off-by: Matt Coster <matt.coster@xxxxxxxxxx> Co-developed-by: Donald Robson <donald.robson@xxxxxxxxxx> Signed-off-by: Donald Robson <donald.robson@xxxxxxxxxx> Signed-off-by: Sarah Walker <sarah.walker@xxxxxxxxxx> --- drivers/gpu/drm/imagination/Kconfig | 1 + drivers/gpu/drm/imagination/Makefile | 5 +- drivers/gpu/drm/imagination/pvr_device.c | 27 +- drivers/gpu/drm/imagination/pvr_device.h | 18 + drivers/gpu/drm/imagination/pvr_drv.c | 301 ++- drivers/gpu/drm/imagination/pvr_gem.c | 421 ++++ drivers/gpu/drm/imagination/pvr_gem.h | 184 ++ drivers/gpu/drm/imagination/pvr_mmu.c | 2528 ++++++++++++++++++++++ drivers/gpu/drm/imagination/pvr_mmu.h | 108 + drivers/gpu/drm/imagination/pvr_vm.c | 947 ++++++++ drivers/gpu/drm/imagination/pvr_vm.h | 60 + 11 files changed, 4589 insertions(+), 11 deletions(-) create mode 100644 drivers/gpu/drm/imagination/pvr_gem.c create mode 100644 drivers/gpu/drm/imagination/pvr_gem.h create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.c create mode 100644 drivers/gpu/drm/imagination/pvr_mmu.h create mode 100644 drivers/gpu/drm/imagination/pvr_vm.c create mode 100644 drivers/gpu/drm/imagination/pvr_vm.h diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig index e9aaa5313485..3e167d5470b4 100644 --- a/drivers/gpu/drm/imagination/Kconfig +++ b/drivers/gpu/drm/imagination/Kconfig @@ -7,6 +7,7 @@ config DRM_POWERVR depends on DRM select DRM_GEM_SHMEM_HELPER select DRM_SCHED + select DRM_GPUVM select FW_LOADER help Choose this option if you have a system that has an Imagination diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile index 9e144ff2742b..8fcabc1bea36 100644 --- a/drivers/gpu/drm/imagination/Makefile +++ b/drivers/gpu/drm/imagination/Makefile @@ -7,6 +7,9 @@ powervr-y := \ pvr_device.o \ pvr_device_info.o \ pvr_drv.o \ - pvr_fw.o + pvr_fw.o \ + pvr_gem.o \ + pvr_mmu.o \ + pvr_vm.o obj-$(CONFIG_DRM_POWERVR) += powervr.o diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c index b1fae182c4f6..f71e400ea24e 100644 --- a/drivers/gpu/drm/imagination/pvr_device.c +++ b/drivers/gpu/drm/imagination/pvr_device.c @@ -6,6 +6,7 @@ #include "pvr_fw.h" #include "pvr_rogue_cr_defs.h" +#include "pvr_vm.h" #include <drm/drm_print.h> @@ -312,7 +313,30 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev) else return -EINVAL; - return pvr_set_dma_info(pvr_dev); + err = pvr_set_dma_info(pvr_dev); + if (err) + return err; + + if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) { + pvr_dev->kernel_vm_ctx = pvr_vm_create_context(pvr_dev, false); + if (IS_ERR(pvr_dev->kernel_vm_ctx)) + return PTR_ERR(pvr_dev->kernel_vm_ctx); + } + + return 0; +} + +/** + * pvr_device_gpu_fini() - GPU-specific deinitialization for a PowerVR device + * @pvr_dev: Target PowerVR device. + */ +static void +pvr_device_gpu_fini(struct pvr_device *pvr_dev) +{ + if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) { + WARN_ON(!pvr_vm_context_put(pvr_dev->kernel_vm_ctx)); + pvr_dev->kernel_vm_ctx = NULL; + } } /** @@ -364,6 +388,7 @@ pvr_device_fini(struct pvr_device *pvr_dev) * Deinitialization stages are performed in reverse order compared to * the initialization stages in pvr_device_init(). */ + pvr_device_gpu_fini(pvr_dev); } bool diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h index 833fd686c8eb..350e894a2939 100644 --- a/drivers/gpu/drm/imagination/pvr_device.h +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -123,6 +123,16 @@ struct pvr_device { */ struct clk *mem_clk; + /** + * @kernel_vm_ctx: Virtual memory context used for kernel mappings. + * + * This is used for mappings in the firmware address region when a META firmware processor + * is in use. + * + * When a MIPS firmware processor is in use, this will be %NULL. + */ + struct pvr_vm_context *kernel_vm_ctx; + /** @fw_dev: Firmware related data. */ struct pvr_fw_device fw_dev; }; @@ -145,6 +155,14 @@ struct pvr_file { * to_pvr_device(). */ struct pvr_device *pvr_dev; + + /** + * @vm_ctx_handles: Array of VM contexts belonging to this file. Array + * members are of type "struct pvr_vm_context *". + * + * This array is used to allocate handles returned to userspace. + */ + struct xarray vm_ctx_handles; }; /** diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c index ecdef9720eea..4b0c52118735 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.c +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -3,9 +3,11 @@ #include "pvr_device.h" #include "pvr_drv.h" +#include "pvr_gem.h" #include "pvr_rogue_defs.h" #include "pvr_rogue_fwif_client.h" #include "pvr_rogue_fwif_shared.h" +#include "pvr_vm.h" #include <uapi/drm/pvr_drm.h> @@ -60,7 +62,85 @@ static int pvr_ioctl_create_bo(struct drm_device *drm_dev, void *raw_args, struct drm_file *file) { - return -ENOTTY; + struct drm_pvr_ioctl_create_bo_args *args = raw_args; + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct pvr_file *pvr_file = to_pvr_file(file); + + struct pvr_gem_object *pvr_obj; + size_t sanitized_size; + size_t real_size; + + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + /* All padding fields must be zeroed. */ + if (args->_padding_c != 0) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + /* + * On 64-bit platforms (our primary target), size_t is a u64. However, + * on other architectures we have to check for overflow when casting + * down to size_t from u64. + * + * We also disallow zero-sized allocations, and reserved (kernel-only) + * flags. + */ + if (args->size > SIZE_MAX || args->size == 0 || args->flags & ~DRM_PVR_BO_FLAGS_MASK) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + sanitized_size = (size_t)args->size; + + /* + * Create a buffer object and transfer ownership to a userspace- + * accessible handle. + */ + pvr_obj = pvr_gem_object_create(pvr_dev, sanitized_size, args->flags); + if (IS_ERR(pvr_obj)) { + err = PTR_ERR(pvr_obj); + goto err_drm_dev_exit; + } + + /* + * Store the actual size of the created buffer object. We can't fetch + * this after this point because we will no longer have a reference to + * &pvr_obj. + */ + real_size = pvr_gem_object_size(pvr_obj); + + /* This function will not modify &args->handle unless it succeeds. */ + err = pvr_gem_object_into_handle(pvr_obj, pvr_file, &args->handle); + if (err) + goto err_destroy_obj; + + /* + * Now write the real size back to the args struct, after no further + * errors can occur. + */ + args->size = real_size; + + drm_dev_exit(idx); + + return 0; + +err_destroy_obj: + /* + * GEM objects are refcounted, so there is no explicit destructor + * function. Instead, we release the singular reference we currently + * hold on the object and let GEM take care of the rest. + */ + pvr_gem_object_put(pvr_obj); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; } /** @@ -87,7 +167,61 @@ static int pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args, struct drm_file *file) { - return -ENOTTY; + struct drm_pvr_ioctl_get_bo_mmap_offset_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_gem_object *pvr_obj; + struct drm_gem_object *gem_obj; + int idx; + int ret; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + /* All padding fields must be zeroed. */ + if (args->_padding_4 != 0) { + ret = -EINVAL; + goto err_drm_dev_exit; + } + + /* + * Obtain a kernel reference to the buffer object. This reference is + * counted and must be manually dropped before returning. If a buffer + * object cannot be found for the specified handle, return -%ENOENT (No + * such file or directory). + */ + pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle); + if (!pvr_obj) { + ret = -ENOENT; + goto err_drm_dev_exit; + } + + gem_obj = gem_from_pvr_gem(pvr_obj); + + /* + * Allocate a fake offset which can be used in userspace calls to mmap + * on the DRM device file. If this fails, return the error code. This + * operation is idempotent. + */ + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret != 0) { + /* Drop our reference to the buffer object. */ + drm_gem_object_put(gem_obj); + goto err_drm_dev_exit; + } + + /* + * Read out the fake offset allocated by the earlier call to + * drm_gem_create_mmap_offset. + */ + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + + /* Drop our reference to the buffer object. */ + pvr_gem_object_put(pvr_obj); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return ret; } static __always_inline u64 @@ -516,10 +650,12 @@ pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args, break; case DRM_PVR_DEV_QUERY_HEAP_INFO_GET: - return -EINVAL; + ret = pvr_heap_info_get(pvr_dev, args); + break; case DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET: - return -EINVAL; + ret = pvr_static_data_areas_get(pvr_dev, args); + break; } drm_dev_exit(idx); @@ -666,7 +802,46 @@ static int pvr_ioctl_create_vm_context(struct drm_device *drm_dev, void *raw_args, struct drm_file *file) { - return -ENOTTY; + struct drm_pvr_ioctl_create_vm_context_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + if (args->_padding_4) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + vm_ctx = pvr_vm_create_context(pvr_file->pvr_dev, true); + if (IS_ERR(vm_ctx)) { + err = PTR_ERR(vm_ctx); + goto err_drm_dev_exit; + } + + /* Allocate object handle for userspace. */ + err = xa_alloc(&pvr_file->vm_ctx_handles, + &args->handle, + vm_ctx, + xa_limit_32b, + GFP_KERNEL); + if (err < 0) + goto err_cleanup; + + drm_dev_exit(idx); + + return 0; + +err_cleanup: + pvr_vm_context_put(vm_ctx); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; } /** @@ -686,7 +861,19 @@ static int pvr_ioctl_destroy_vm_context(struct drm_device *drm_dev, void *raw_args, struct drm_file *file) { - return -ENOTTY; + struct drm_pvr_ioctl_destroy_vm_context_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + + if (args->_padding_4) + return -EINVAL; + + vm_ctx = xa_erase(&pvr_file->vm_ctx_handles, args->handle); + if (!vm_ctx) + return -EINVAL; + + pvr_vm_context_put(vm_ctx); + return 0; } /** @@ -716,7 +903,79 @@ static int pvr_ioctl_vm_map(struct drm_device *drm_dev, void *raw_args, struct drm_file *file) { - return -ENOTTY; + struct pvr_device *pvr_dev = to_pvr_device(drm_dev); + struct drm_pvr_ioctl_vm_map_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + + struct pvr_gem_object *pvr_obj; + size_t pvr_obj_size; + + u64 offset_plus_size; + int idx; + int err; + + if (!drm_dev_enter(drm_dev, &idx)) + return -EIO; + + /* Initial validation of args. */ + if (args->_padding_14) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + if (args->flags != 0 || + check_add_overflow(args->offset, args->size, &offset_plus_size) || + !pvr_find_heap_containing(pvr_dev, args->device_addr, args->size)) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle); + if (!vm_ctx) { + err = -EINVAL; + goto err_drm_dev_exit; + } + + pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle); + if (!pvr_obj) { + err = -ENOENT; + goto err_put_vm_context; + } + + pvr_obj_size = pvr_gem_object_size(pvr_obj); + + /* + * Validate offset and size args. The alignment of these will be + * checked when mapping; for now just check that they're within valid + * bounds + */ + if (args->offset >= pvr_obj_size || offset_plus_size > pvr_obj_size) { + err = -EINVAL; + goto err_put_pvr_object; + } + + err = pvr_vm_map(vm_ctx, pvr_obj, args->offset, + args->device_addr, args->size); + if (err) + goto err_put_pvr_object; + + /* + * In order to set up the mapping, we needed a reference to &pvr_obj. + * However, pvr_vm_map() obtains and stores its own reference, so we + * must release ours before returning. + */ + +err_put_pvr_object: + pvr_gem_object_put(pvr_obj); + +err_put_vm_context: + pvr_vm_context_put(vm_ctx); + +err_drm_dev_exit: + drm_dev_exit(idx); + + return err; } /** @@ -739,7 +998,24 @@ static int pvr_ioctl_vm_unmap(struct drm_device *drm_dev, void *raw_args, struct drm_file *file) { - return -ENOTTY; + struct drm_pvr_ioctl_vm_unmap_args *args = raw_args; + struct pvr_file *pvr_file = to_pvr_file(file); + struct pvr_vm_context *vm_ctx; + int err; + + /* Initial validation of args. */ + if (args->_padding_4) + return -EINVAL; + + vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle); + if (!vm_ctx) + return -EINVAL; + + err = pvr_vm_unmap(vm_ctx, args->device_addr, args->size); + + pvr_vm_context_put(vm_ctx); + + return err; } /* @@ -930,6 +1206,8 @@ pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file) */ pvr_file->pvr_dev = pvr_dev; + xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1); + /* * Store reference to powervr-specific file private data in DRM file * private data. @@ -955,6 +1233,9 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev, { struct pvr_file *pvr_file = to_pvr_file(file); + /* Drop references on any remaining objects. */ + pvr_destroy_vm_contexts_for_file(pvr_file); + kfree(pvr_file); file->driver_priv = NULL; } @@ -962,7 +1243,7 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev, DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops); static struct drm_driver pvr_drm_driver = { - .driver_features = DRIVER_RENDER, + .driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER, .open = pvr_drm_driver_open, .postclose = pvr_drm_driver_postclose, .ioctls = pvr_drm_driver_ioctls, @@ -976,6 +1257,8 @@ static struct drm_driver pvr_drm_driver = { .minor = PVR_DRIVER_MINOR, .patchlevel = PVR_DRIVER_PATCHLEVEL, + .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, + .gem_create_object = pvr_gem_create_object, }; static int diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c new file mode 100644 index 000000000000..51d7a6fe3e58 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_gem.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_device.h" +#include "pvr_gem.h" +#include "pvr_vm.h" + +#include <drm/drm_gem.h> +#include <drm/drm_prime.h> + +#include <linux/compiler.h> +#include <linux/compiler_attributes.h> +#include <linux/dma-buf.h> +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/iosys-map.h> +#include <linux/log2.h> +#include <linux/mutex.h> +#include <linux/pagemap.h> +#include <linux/refcount.h> +#include <linux/scatterlist.h> + +static void pvr_gem_object_free(struct drm_gem_object *obj) +{ + struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(obj); + + mutex_destroy(&pvr_gem->gpuva_lock); + drm_gem_shmem_object_free(obj); +} + +static int pvr_gem_mmap(struct drm_gem_object *gem_obj, struct vm_area_struct *vma) +{ + struct pvr_gem_object *pvr_obj = gem_to_pvr_gem(gem_obj); + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + + if (!(pvr_obj->flags & DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS)) + return -EINVAL; + + return drm_gem_shmem_mmap(shmem_obj, vma); +} + +static const struct drm_gem_object_funcs pvr_gem_object_funcs = { + .free = pvr_gem_object_free, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = pvr_gem_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, +}; + +/** + * pvr_gem_object_flags_validate() - Verify that a collection of PowerVR GEM + * mapping and/or creation flags form a valid combination. + * @flags: PowerVR GEM mapping/creation flags to validate. + * + * This function explicitly allows kernel-only flags. All ioctl entrypoints + * should do their own validation as well as relying on this function. + * + * Return: + * * %true if @flags contains valid mapping and/or creation flags, or + * * %false otherwise. + */ +static bool +pvr_gem_object_flags_validate(u64 flags) +{ + static const u64 invalid_combinations[] = { + /* + * Memory flagged as PM/FW-protected cannot be mapped to + * userspace. To make this explicit, we require that the two + * flags allowing each of these respective features are never + * specified together. + */ + (DRM_PVR_BO_DEVICE_PM_FW_PROTECT | + DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS), + }; + + int i; + + /* + * Check for bits set in undefined regions. Reserved regions refer to + * options that can only be set by the kernel. These are explicitly + * allowed in most cases, and must be checked specifically in IOCTL + * callback code. + */ + if ((flags & PVR_BO_UNDEFINED_MASK) != 0) + return false; + + /* + * Check for all combinations of flags marked as invalid in the array + * above. + */ + for (i = 0; i < ARRAY_SIZE(invalid_combinations); ++i) { + u64 combo = invalid_combinations[i]; + + if ((flags & combo) == combo) + return false; + } + + return true; +} + +/** + * pvr_gem_object_into_handle() - Convert a reference to an object into a + * userspace-accessible handle. + * @pvr_obj: [IN] Target PowerVR-specific object. + * @pvr_file: [IN] File to associate the handle with. + * @handle: [OUT] Pointer to store the created handle in. Remains unmodified if + * an error is encountered. + * + * If an error is encountered, ownership of @pvr_obj will not have been + * transferred. If this function succeeds, however, further use of @pvr_obj is + * considered undefined behaviour unless another reference to it is explicitly + * held. + * + * Return: + * * 0 on success, or + * * Any error encountered while attempting to allocate a handle on @pvr_file. + */ +int +pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj, + struct pvr_file *pvr_file, u32 *handle) +{ + struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj); + struct drm_file *file = from_pvr_file(pvr_file); + + u32 new_handle; + int err; + + err = drm_gem_handle_create(file, gem_obj, &new_handle); + if (err) + return err; + + /* + * Release our reference to @pvr_obj, effectively transferring + * ownership to the handle. + */ + pvr_gem_object_put(pvr_obj); + + /* + * Do not store the new handle in @handle until no more errors can + * occur. + */ + *handle = new_handle; + + return 0; +} + +/** + * pvr_gem_object_from_handle() - Obtain a reference to an object from a + * userspace handle. + * @pvr_file: PowerVR-specific file to which @handle is associated. + * @handle: Userspace handle referencing the target object. + * + * On return, @handle always maintains its reference to the requested object + * (if it had one in the first place). If this function succeeds, the returned + * object will hold an additional reference. When the caller is finished with + * the returned object, they should call pvr_gem_object_put() on it to release + * this reference. + * + * Return: + * * A pointer to the requested PowerVR-specific object on success, or + * * %NULL otherwise. + */ +struct pvr_gem_object * +pvr_gem_object_from_handle(struct pvr_file *pvr_file, u32 handle) +{ + struct drm_file *file = from_pvr_file(pvr_file); + struct drm_gem_object *gem_obj; + + gem_obj = drm_gem_object_lookup(file, handle); + if (!gem_obj) + return NULL; + + return gem_to_pvr_gem(gem_obj); +} + +/** + * pvr_gem_object_vmap() - Map a PowerVR GEM object into CPU virtual address + * space. + * @pvr_obj: Target PowerVR GEM object. + * + * Once the caller is finished with the CPU mapping, they must call + * pvr_gem_object_vunmap() on @pvr_obj. + * + * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_cpu() is called to make + * sure the CPU mapping is consistent. + * + * Return: + * * A pointer to the CPU mapping on success, + * * -%ENOMEM if the mapping fails, or + * * Any error encountered while attempting to acquire a reference to the + * backing pages for @pvr_obj. + */ +void * +pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj) +{ + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj); + struct iosys_map map; + int err; + + dma_resv_lock(obj->resv, NULL); + + err = drm_gem_shmem_vmap(shmem_obj, &map); + if (err) + goto err_unlock; + + if (pvr_obj->flags & PVR_BO_CPU_CACHED) { + struct device *dev = shmem_obj->base.dev->dev; + + /* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped + * in GPU space yet. + */ + if (shmem_obj->sgt) + dma_sync_sgtable_for_cpu(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL); + } + + dma_resv_unlock(obj->resv); + + return map.vaddr; + +err_unlock: + dma_resv_unlock(obj->resv); + + return ERR_PTR(err); +} + +/** + * pvr_gem_object_vunmap() - Unmap a PowerVR memory object from CPU virtual + * address space. + * @pvr_obj: Target PowerVR GEM object. + * + * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_device() is called to make + * sure the GPU mapping is consistent. + */ +void +pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj) +{ + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(shmem_obj->vaddr); + struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj); + + if (WARN_ON(!map.vaddr)) + return; + + dma_resv_lock(obj->resv, NULL); + + if (pvr_obj->flags & PVR_BO_CPU_CACHED) { + struct device *dev = shmem_obj->base.dev->dev; + + /* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped + * in GPU space yet. + */ + if (shmem_obj->sgt) + dma_sync_sgtable_for_device(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL); + } + + drm_gem_shmem_vunmap(shmem_obj, &map); + + dma_resv_unlock(obj->resv); +} + +/** + * pvr_gem_object_zero() - Zeroes the physical memory behind an object. + * @pvr_obj: Target PowerVR GEM object. + * + * Return: + * * 0 on success, or + * * Any error encountered while attempting to map @pvr_obj to the CPU (see + * pvr_gem_object_vmap()). + */ +static int +pvr_gem_object_zero(struct pvr_gem_object *pvr_obj) +{ + void *cpu_ptr; + + cpu_ptr = pvr_gem_object_vmap(pvr_obj); + if (IS_ERR(cpu_ptr)) + return PTR_ERR(cpu_ptr); + + memset(cpu_ptr, 0, pvr_gem_object_size(pvr_obj)); + + /* Make sure the zero-ing is done before vumap-ing the object. */ + wmb(); + + pvr_gem_object_vunmap(pvr_obj); + + return 0; +} + +/** + * pvr_gem_create_object() - Allocate and pre-initializes a pvr_gem_object + * @drm_dev: DRM device creating this object. + * @size: Size of the object to allocate in bytes. + * + * Return: + * * The new pre-initialized GEM object on success, + * * -ENOMEM if the allocation failed. + */ +struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size) +{ + struct drm_gem_object *gem_obj; + struct pvr_gem_object *pvr_obj; + + pvr_obj = kzalloc(sizeof(*pvr_obj), GFP_KERNEL); + if (!pvr_obj) + return ERR_PTR(-ENOMEM); + + gem_obj = gem_from_pvr_gem(pvr_obj); + gem_obj->funcs = &pvr_gem_object_funcs; + + drm_gem_gpuva_set_lock(gem_obj, &pvr_obj->gpuva_lock); + + return gem_obj; +} + +/** + * pvr_gem_object_create() - Creates a PowerVR-specific buffer object. + * @pvr_dev: Target PowerVR device. + * @size: Size of the object to allocate in bytes. Must be greater than zero. + * Any value which is not an exact multiple of the system page size will be + * rounded up to satisfy this condition. + * @flags: Options which affect both this operation and future mapping + * operations performed on the returned object. Must be a combination of + * DRM_PVR_BO_* and/or PVR_BO_* flags. + * + * The created object may be larger than @size, but can never be smaller. To + * get the exact size, call pvr_gem_object_size() on the returned pointer. + * + * Return: + * * The newly-minted PowerVR-specific buffer object on success, + * * -%EINVAL if @size is zero or @flags is not valid, + * * -%ENOMEM if sufficient physical memory cannot be allocated, or + * * Any other error returned by drm_gem_create_mmap_offset(). + */ +struct pvr_gem_object * +pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags) +{ + struct drm_gem_shmem_object *shmem_obj; + struct pvr_gem_object *pvr_obj; + struct sg_table *sgt; + int err; + + /* Verify @size and @flags before continuing. */ + if (size == 0 || !pvr_gem_object_flags_validate(flags)) + return ERR_PTR(-EINVAL); + + shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size); + if (IS_ERR(shmem_obj)) + return ERR_CAST(shmem_obj); + + shmem_obj->pages_mark_dirty_on_put = true; + shmem_obj->map_wc = !(flags & PVR_BO_CPU_CACHED); + pvr_obj = shmem_gem_to_pvr_gem(shmem_obj); + pvr_obj->flags = flags; + + sgt = drm_gem_shmem_get_pages_sgt(shmem_obj); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto err_shmem_object_free; + } + + mutex_init(&pvr_obj->gpuva_lock); + + dma_sync_sgtable_for_device(shmem_obj->base.dev->dev, sgt, + DMA_BIDIRECTIONAL); + + /* + * Do this last because pvr_gem_object_zero() requires a fully + * configured instance of struct pvr_gem_object. + */ + pvr_gem_object_zero(pvr_obj); + + return pvr_obj; + +err_shmem_object_free: + drm_gem_shmem_free(shmem_obj); + + return ERR_PTR(err); +} + +/** + * pvr_gem_get_dma_addr() - Get DMA address for given offset in object + * @pvr_obj: Pointer to object to lookup address in. + * @offset: Offset within object to lookup address at. + * @dma_addr_out: Pointer to location to store DMA address. + * + * Returns: + * * 0 on success, or + * * -%EINVAL if object is not currently backed, or if @offset is out of valid + * range for this object. + */ +int +pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset, + dma_addr_t *dma_addr_out) +{ + struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj); + u32 accumulated_offset = 0; + struct scatterlist *sgl; + unsigned int sgt_idx; + + WARN_ON(!shmem_obj->sgt); + for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, sgt_idx) { + u32 new_offset = accumulated_offset + sg_dma_len(sgl); + + if (offset >= accumulated_offset && offset < new_offset) { + *dma_addr_out = sg_dma_address(sgl) + + (offset - accumulated_offset); + return 0; + } + + accumulated_offset = new_offset; + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/imagination/pvr_gem.h b/drivers/gpu/drm/imagination/pvr_gem.h new file mode 100644 index 000000000000..3cd92aa72929 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_gem.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_GEM_H +#define PVR_GEM_H + +#include "pvr_rogue_heap_config.h" +#include "pvr_rogue_meta.h" + +#include <uapi/drm/pvr_drm.h> + +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_mm.h> + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/const.h> +#include <linux/compiler_attributes.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/refcount.h> +#include <linux/scatterlist.h> +#include <linux/sizes.h> +#include <linux/types.h> + +/* Forward declaration from "pvr_device.h". */ +struct pvr_device; +struct pvr_file; + +/** + * DOC: Flags for DRM_IOCTL_PVR_CREATE_BO (kernel-only) + * + * Kernel-only values allowed in &pvr_gem_object->flags. The majority of options + * for this field are specified in the UAPI header "pvr_drm.h" with a + * DRM_PVR_BO_ prefix. To distinguish these internal options (which must exist + * in ranges marked as "reserved" in the UAPI header), we drop the DRM prefix. + * The public options should be used directly, DRM prefix and all. + * + * To avoid potentially confusing gaps in the UAPI options, these kernel-only + * options are specified "in reverse", starting at bit 63. + * + * We use "reserved" to refer to bits defined here and not exposed in the UAPI. + * Bits not defined anywhere are "undefined". + * + * Creation options + * These use the prefix PVR_BO_CREATE_. + * + * *There are currently no kernel-only flags in this group.* + * + * Device mapping options + * These use the prefix PVR_BO_DEVICE_. + * + * *There are currently no kernel-only flags in this group.* + * + * CPU mapping options + * These use the prefix PVR_BO_CPU_. + * + * :CACHED: By default, all GEM objects are mapped write-combined on the + * CPU. Set this flag to override this behaviour and map the object + * cached. + */ +#define PVR_BO_CPU_CACHED BIT_ULL(63) + +#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62) + +#define PVR_BO_KERNEL_FLAGS_MASK (PVR_BO_CPU_CACHED | PVR_BO_FW_NO_CLEAR_ON_RESET) + +/* Bits 61..3 are undefined. */ +/* Bits 2..0 are defined in the UAPI. */ + +/* Other utilities. */ +#define PVR_BO_UNDEFINED_MASK ~(PVR_BO_KERNEL_FLAGS_MASK | DRM_PVR_BO_FLAGS_MASK) + +/* + * All firmware-mapped memory uses (mostly) the same flags. Specifically, + * firmware-mapped memory should be: + * * Read/write on the device, + * * Read/write on the CPU, and + * * Write-combined on the CPU. + * + * The only variation is in caching on the device. + */ +#define PVR_BO_FW_FLAGS_DEVICE_CACHED (ULL(0)) +#define PVR_BO_FW_FLAGS_DEVICE_UNCACHED DRM_PVR_BO_DEVICE_BYPASS_CACHE + +/** + * struct pvr_gem_object - powervr-specific wrapper for &struct drm_gem_object + */ +struct pvr_gem_object { + /** + * @base: The underlying &struct drm_gem_shmem_object. + * + * Do not access this member directly, instead call + * shem_gem_from_pvr_gem(). + */ + struct drm_gem_shmem_object base; + + /** + * @flags: Options set at creation-time. Some of these options apply to + * the creation operation itself (which are stored here for reference) + * with the remainder used for mapping options to both the device and + * CPU. These are used every time this object is mapped, but may be + * changed after creation. + * + * Must be a combination of DRM_PVR_BO_* and/or PVR_BO_* flags. + * + * .. note:: + * + * This member is declared const to indicate that none of these + * options may change or be changed throughout the object's + * lifetime. + */ + u64 flags; + + /** + * @gpuva_lock: Lock for preventing concurrent access during memory + * mapping operations. + */ + struct mutex gpuva_lock; +}; + +static_assert(offsetof(struct pvr_gem_object, base) == 0, + "offsetof(struct pvr_gem_object, base) not zero"); + +#define shmem_gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base) + +#define shmem_gem_to_pvr_gem(shmem_obj) container_of_const(shmem_obj, struct pvr_gem_object, base) + +#define gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base.base) + +#define gem_to_pvr_gem(gem_obj) container_of_const(gem_obj, struct pvr_gem_object, base.base) + +/* Functions defined in pvr_gem.c */ + +struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size); + +struct pvr_gem_object *pvr_gem_object_create(struct pvr_device *pvr_dev, + size_t size, u64 flags); + +int pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj, + struct pvr_file *pvr_file, u32 *handle); +struct pvr_gem_object *pvr_gem_object_from_handle(struct pvr_file *pvr_file, + u32 handle); + +static __always_inline struct sg_table * +pvr_gem_object_get_pages_sgt(struct pvr_gem_object *pvr_obj) +{ + return drm_gem_shmem_get_pages_sgt(shmem_gem_from_pvr_gem(pvr_obj)); +} + +void *pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj); +void pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj); + +int pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset, + dma_addr_t *dma_addr_out); + +/** + * pvr_gem_object_get() - Acquire reference on pvr_gem_object + * @pvr_obj: Pointer to object to acquire reference on. + */ +static __always_inline void +pvr_gem_object_get(struct pvr_gem_object *pvr_obj) +{ + drm_gem_object_get(gem_from_pvr_gem(pvr_obj)); +} + +/** + * pvr_gem_object_put() - Release reference on pvr_gem_object + * @pvr_obj: Pointer to object to release reference on. + */ +static __always_inline void +pvr_gem_object_put(struct pvr_gem_object *pvr_obj) +{ + drm_gem_object_put(gem_from_pvr_gem(pvr_obj)); +} + +static __always_inline size_t +pvr_gem_object_size(struct pvr_gem_object *pvr_obj) +{ + return gem_from_pvr_gem(pvr_obj)->size; +} + +#endif /* PVR_GEM_H */ diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c new file mode 100644 index 000000000000..895e206fe555 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_mmu.c @@ -0,0 +1,2528 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_mmu.h" + +#include "pvr_device.h" +#include "pvr_fw.h" +#include "pvr_gem.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_mmu_defs.h" + +#include <drm/drm_drv.h> +#include <linux/bitops.h> +#include <linux/dma-mapping.h> +#include <linux/kmemleak.h> +#include <linux/minmax.h> +#include <linux/sizes.h> + +#define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_)) +#define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1))) + +/* + * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently + * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both + * ensures that the selected host page size corresponds to a valid device page + * size and sets up values needed by the MMU code below. + */ +#if (PVR_DEVICE_PAGE_SIZE == SZ_4K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_16K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_64K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_256K) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_1M) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK +#elif (PVR_DEVICE_PAGE_SIZE == SZ_2M) +# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB +# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT +# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK +#else +# error Unsupported device page size PVR_DEVICE_PAGE_SIZE +#endif + +#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X \ + (ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \ + (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K))) + +/** + * pvr_mmu_flush() - Request flush of all MMU caches. + * @pvr_dev: Target PowerVR device. + * + * This function must be called following any possible change to the MMU page + * tables. + * + * Returns: + * * 0 on success, or + * * Any error encountered while submitting the flush command via the KCCB. + */ +int +pvr_mmu_flush(struct pvr_device *pvr_dev) +{ + /* TODO: implement */ + return -ENODEV; +} + +/** + * DOC: PowerVR Virtual Memory Handling + */ +/** + * DOC: PowerVR Virtual Memory Handling (constants) + * + * .. c:macro:: PVR_IDX_INVALID + * + * Default value for a u16-based index. + * + * This value cannot be zero, since zero is a valid index value. + */ +#define PVR_IDX_INVALID ((u16)(-1)) + +/** + * DOC: MMU backing pages + */ +/** + * DOC: MMU backing pages (constants) + * + * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE + * + * Page size of a PowerVR device's integrated MMU. The CPU page size must be + * at least as large as this value for the current implementation; this is + * checked at compile-time. + */ +#define PVR_MMU_BACKING_PAGE_SIZE SZ_4K +static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE); + +/** + * struct pvr_mmu_backing_page - Represents a single page used to back a page + * table of any level. + * @dma_addr: DMA address of this page. + * @host_ptr: CPU address of this page. + * @pvr_dev: The PowerVR device to which this page is associated. **For + * internal use only.** + */ +struct pvr_mmu_backing_page { + dma_addr_t dma_addr; + void *host_ptr; +/* private: internal use only */ + struct page *raw_page; + struct pvr_device *pvr_dev; +}; + +/** + * pvr_mmu_backing_page_init() - Initialize a MMU backing page. + * @page: Target backing page. + * @pvr_dev: Target PowerVR device. + * + * This function performs three distinct operations: + * + * 1. Allocate a single page, + * 2. Map the page to the CPU, and + * 3. Map the page to DMA-space. + * + * It is expected that @page be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * -%ENOMEM if allocation of the backing page or mapping of the backing + * page to DMA fails. + */ +static int +pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page, + struct pvr_device *pvr_dev) +{ + struct device *dev = from_pvr_device(pvr_dev)->dev; + + struct page *raw_page; + int err; + + dma_addr_t dma_addr; + void *host_ptr; + + raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL); + if (!raw_page) + return -ENOMEM; + + host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (!host_ptr) { + err = -ENOMEM; + goto err_free_page; + } + + dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + err = -ENOMEM; + goto err_unmap_page; + } + + page->dma_addr = dma_addr; + page->host_ptr = host_ptr; + page->pvr_dev = pvr_dev; + page->raw_page = raw_page; + kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL); + + return 0; + +err_unmap_page: + vunmap(host_ptr); + +err_free_page: + __free_page(raw_page); + + return err; +} + +/** + * pvr_mmu_backing_page_fini() - Teardown a MMU backing page. + * @page: Target backing page. + * + * This function performs the mirror operations to pvr_mmu_backing_page_init(), + * in reverse order: + * + * 1. Unmap the page from DMA-space, + * 2. Unmap the page from the CPU, and + * 3. Free the page. + * + * It also zeros @page. + * + * It is a no-op to call this function a second (or further) time on any @page. + */ +static void +pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page) +{ + struct device *dev = from_pvr_device(page->pvr_dev)->dev; + + /* Do nothing if no allocation is present. */ + if (!page->pvr_dev) + return; + + dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE, + DMA_TO_DEVICE); + + kmemleak_free(page->host_ptr); + vunmap(page->host_ptr); + + __free_page(page->raw_page); + + memset(page, 0, sizeof(*page)); +} + +/** + * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the + * device. + * @page: Target backing page. + * + * .. caution:: + * + * **This is potentially an expensive function call.** Only call + * pvr_mmu_backing_page_sync() once you're sure you have no more changes to + * make to the backing page in the immediate future. + */ +static void +pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page) +{ + struct device *dev; + + /* + * Do nothing if no allocation is present. This may be the case if + * we are unmapping pages. + */ + if (!page->pvr_dev) + return; + + dev = from_pvr_device(page->pvr_dev)->dev; + + dma_sync_single_for_device(dev, page->dma_addr, + PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE); +} + +/** + * DOC: Raw page tables + */ + +#define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \ + typeof_member(struct pvr_page_table_l##level_##_entry_raw, val) + +#define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_) \ + (((entry_).val & \ + ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \ + ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) + +#define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_) \ + ((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_)) \ + << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \ + ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) + +/** + * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table. + * @val: The raw value of this entry. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE. + * + * The value stored in this structure can be decoded using the following bitmap: + * + * .. flat-table:: + * :widths: 1 5 + * :stub-columns: 1 + * + * * - 31..4 + * - **Level 1 Page Table Base Address:** Bits 39..12 of the L1 + * page table base address, which is 4KiB aligned. + * + * * - 3..2 + * - *(reserved)* + * + * * - 1 + * - **Pending:** When valid bit is not set, indicates that a valid + * entry is pending and the MMU should wait for the driver to map + * the entry. This is used to support page demand mapping of + * memory. + * + * * - 0 + * - **Valid:** Indicates that the entry contains a valid L1 page + * table. If the valid bit is not set, then an attempted use of + * the page would result in a page fault. + */ +struct pvr_page_table_l2_entry_raw { + u32 val; +} __packed; +static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 == + ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE); + +static bool +pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry) +{ + return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry); +} + +/** + * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2 + * page table. + * @entry: Target raw level 2 page table entry. + * @child_table_dma_addr: DMA address of the level 1 page table to be + * associated with @entry. + * + * When calling this function, @child_table_dma_addr must be a valid DMA + * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE. + */ +static void +pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry, + dma_addr_t child_table_dma_addr) +{ + child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT; + + WRITE_ONCE(entry->val, + PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) | + PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) | + PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr)); +} + +static void +pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry) +{ + WRITE_ONCE(entry->val, 0); +} + +/** + * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table. + * @val: The raw value of this entry. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE. + * + * The value stored in this structure can be decoded using the following bitmap: + * + * .. flat-table:: + * :widths: 1 5 + * :stub-columns: 1 + * + * * - 63..41 + * - *(reserved)* + * + * * - 40 + * - **Pending:** When valid bit is not set, indicates that a valid entry + * is pending and the MMU should wait for the driver to map the entry. + * This is used to support page demand mapping of memory. + * + * * - 39..5 + * - **Level 0 Page Table Base Address:** The way this value is + * interpreted depends on the page size. Bits not specified in the + * table below (e.g. bits 11..5 for page size 4KiB) should be + * considered reserved. + * + * This table shows the bits used in an L1 page table entry to + * represent the Physical Table Base Address for a given Page Size. + * Since each L1 page table entry covers 2MiB of address space, the + * maximum page size is 2MiB. + * + * .. flat-table:: + * :widths: 1 1 1 1 + * :header-rows: 1 + * :stub-columns: 1 + * + * * - Page size + * - L0 page table base address bits + * - Number of L0 page table entries + * - Size of L0 page table + * + * * - 4KiB + * - 39..12 + * - 512 + * - 4KiB + * + * * - 16KiB + * - 39..10 + * - 128 + * - 1KiB + * + * * - 64KiB + * - 39..8 + * - 32 + * - 256B + * + * * - 256KiB + * - 39..6 + * - 8 + * - 64B + * + * * - 1MiB + * - 39..5 (4 = '0') + * - 2 + * - 16B + * + * * - 2MiB + * - 39..5 (4..3 = '00') + * - 1 + * - 8B + * + * * - 4 + * - *(reserved)* + * + * * - 3..1 + * - **Page Size:** Sets the page size, from 4KiB to 2MiB. + * + * * - 0 + * - **Valid:** Indicates that the entry contains a valid L0 page table. + * If the valid bit is not set, then an attempted use of the page would + * result in a page fault. + */ +struct pvr_page_table_l1_entry_raw { + u64 val; +} __packed; +static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 == + ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE); + +static bool +pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry) +{ + return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry); +} + +/** + * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1 + * page table. + * @entry: Target raw level 1 page table entry. + * @child_table_dma_addr: DMA address of the level 0 page table to be + * associated with @entry. + * + * When calling this function, @child_table_dma_addr must be a valid DMA + * address and a multiple of 4 KiB. + */ +static void +pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry, + dma_addr_t child_table_dma_addr) +{ + WRITE_ONCE(entry->val, + PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) | + PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) | + PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) | + /* + * The use of a 4K-specific macro here is correct. It is + * a future optimization to allocate sub-host-page-sized + * blocks for individual tables, so the condition that any + * page table address is aligned to the size of the + * largest (a 4KB) table currently holds. + */ + (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK)); +} + +static void +pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry) +{ + WRITE_ONCE(entry->val, 0); +} + +/** + * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table. + * @val: The raw value of this entry. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE. + * + * The value stored in this structure can be decoded using the following bitmap: + * + * .. flat-table:: + * :widths: 1 5 + * :stub-columns: 1 + * + * * - 63 + * - *(reserved)* + * + * * - 62 + * - **PM/FW Protect:** Indicates a protected region which only the + * Parameter Manager (PM) or firmware processor can write to. + * + * * - 61..40 + * - **VP Page (High):** Virtual-physical page used for Parameter Manager + * (PM) memory. This field is only used if the additional level of PB + * virtualization is enabled. The VP Page field is needed by the PM in + * order to correctly reconstitute the free lists after render + * completion. This (High) field holds bits 39..18 of the value; the + * Low field holds bits 17..12. Bits 11..0 are always zero because the + * value is always aligned to the 4KiB page size. + * + * * - 39..12 + * - **Physical Page Address:** The way this value is interpreted depends + * on the page size. Bits not specified in the table below (e.g. bits + * 20..12 for page size 2MiB) should be considered reserved. + * + * This table shows the bits used in an L0 page table entry to represent + * the Physical Page Address for a given page size (as defined in the + * associated L1 page table entry). + * + * .. flat-table:: + * :widths: 1 1 + * :header-rows: 1 + * :stub-columns: 1 + * + * * - Page size + * - Physical address bits + * + * * - 4KiB + * - 39..12 + * + * * - 16KiB + * - 39..14 + * + * * - 64KiB + * - 39..16 + * + * * - 256KiB + * - 39..18 + * + * * - 1MiB + * - 39..20 + * + * * - 2MiB + * - 39..21 + * + * * - 11..6 + * - **VP Page (Low):** Continuation of VP Page (High). + * + * * - 5 + * - **Pending:** When valid bit is not set, indicates that a valid entry + * is pending and the MMU should wait for the driver to map the entry. + * This is used to support page demand mapping of memory. + * + * * - 4 + * - **PM Src:** Set on Parameter Manager (PM) allocated page table + * entries when indicated by the PM. Note that this bit will only be set + * by the PM, not by the device driver. + * + * * - 3 + * - **SLC Bypass Control:** Specifies requests to this page should bypass + * the System Level Cache (SLC), if enabled in SLC configuration. + * + * * - 2 + * - **Cache Coherency:** Indicates that the page is coherent (i.e. it + * does not require a cache flush between operations on the CPU and the + * device). + * + * * - 1 + * - **Read Only:** If set, this bit indicates that the page is read only. + * An attempted write to this page would result in a write-protection + * fault. + * + * * - 0 + * - **Valid:** Indicates that the entry contains a valid page. If the + * valid bit is not set, then an attempted use of the page would result + * in a page fault. + */ +struct pvr_page_table_l0_entry_raw { + u64 val; +} __packed; +static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 == + ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE); + +/** + * struct pvr_page_flags_raw - The configurable flags from a single entry in a + * level 0 page table. + * @val: The raw value of these flags. Since these are a strict subset of + * &struct pvr_page_table_l0_entry_raw; use that type for our member here. + * + * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache + * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively). + * + * This type should never be instantiated directly; instead use + * pvr_page_flags_raw_create() to ensure only valid bits of @val are set. + */ +struct pvr_page_flags_raw { + struct pvr_page_table_l0_entry_raw val; +} __packed; +static_assert(sizeof(struct pvr_page_flags_raw) == + sizeof(struct pvr_page_table_l0_entry_raw)); + +static bool +pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry) +{ + return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry); +} + +/** + * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0 + * page table. + * @entry: Target raw level 0 page table entry. + * @dma_addr: DMA address of the physical page to be associated with @entry. + * @flags: Options to be set on @entry. + * + * When calling this function, @child_table_dma_addr must be a valid DMA + * address and a multiple of %PVR_DEVICE_PAGE_SIZE. + * + * The @flags parameter is directly assigned into @entry. It is the callers + * responsibility to ensure that only bits specified in + * &struct pvr_page_flags_raw are set in @flags. + */ +static void +pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry, + dma_addr_t dma_addr, + struct pvr_page_flags_raw flags) +{ + WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) | + (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) | + flags.val.val); +} + +static void +pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry) +{ + WRITE_ONCE(entry->val, 0); +} + +/** + * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page + * table entry. + * @read_only: This page is read-only (see: Read Only). + * @cache_coherent: This page does not require cache flushes (see: Cache + * Coherency). + * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control). + * @pm_fw_protect: This page is only for use by the firmware or Parameter + * Manager (see PM/FW Protect). + * + * For more details on the use of these four options, see their respective + * entries in the table under &struct pvr_page_table_l0_entry_raw. + * + * Return: + * A new &struct pvr_page_flags_raw instance which can be passed directly to + * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert(). + */ +static struct pvr_page_flags_raw +pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass, + bool pm_fw_protect) +{ + struct pvr_page_flags_raw flags; + + flags.val.val = + PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) | + PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect); + + return flags; +} + +/** + * struct pvr_page_table_l2_raw - The raw data of a level 2 page table. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %PVR_MMU_BACKING_PAGE_SIZE. + */ +struct pvr_page_table_l2_raw { + /** @entries: The raw values of this table. */ + struct pvr_page_table_l2_entry_raw + entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE]; +} __packed; +static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE); + +/** + * struct pvr_page_table_l1_raw - The raw data of a level 1 page table. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %PVR_MMU_BACKING_PAGE_SIZE. + */ +struct pvr_page_table_l1_raw { + /** @entries: The raw values of this table. */ + struct pvr_page_table_l1_entry_raw + entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE]; +} __packed; +static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE); + +/** + * struct pvr_page_table_l0_raw - The raw data of a level 0 page table. + * + * This type is a structure for type-checking purposes. At compile-time, its + * size is checked against %PVR_MMU_BACKING_PAGE_SIZE. + * + * .. caution:: + * + * The size of level 0 page tables is variable depending on the page size + * specified in the associated level 1 page table entry. Since the device + * page size in use is pegged to the host page size, it cannot vary at + * runtime. This structure is therefore only defined to contain the required + * number of entries for the current device page size. **You should never + * read or write beyond the last supported entry.** + */ +struct pvr_page_table_l0_raw { + /** @entries: The raw values of this table. */ + struct pvr_page_table_l0_entry_raw + entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X]; +} __packed; +static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE); + +/** + * DOC: Mirror page tables + */ + +/* + * We pre-declare these types because they cross-depend on pointers to each + * other. + */ +struct pvr_page_table_l1; +struct pvr_page_table_l0; + +/** + * struct pvr_page_table_l2 - A wrapped level 2 page table. + * + * To access the raw part of this table, use pvr_page_table_l2_get_raw(). + * Alternatively to access a raw entry directly, use + * pvr_page_table_l2_get_entry_raw(). + * + * A level 2 page table forms the root of the page table tree structure, so + * this type has no &parent or &parent_idx members. + */ +struct pvr_page_table_l2 { + /** + * @entries: The children of this node in the page table tree + * structure. These are also mirror tables. The indexing of this array + * is identical to that of the raw equivalent + * (&pvr_page_table_l1_raw.entries). + */ + struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE]; + + /** + * @backing_page: A handle to the memory which holds the raw + * equivalent of this table. **For internal use only.** + */ + struct pvr_mmu_backing_page backing_page; + + /** + * @entry_count: The current number of valid entries (that we know of) + * in this table. This value is essentially a refcount - the table is + * destroyed when this value is decremented to zero by + * pvr_page_table_l2_remove(). + */ + u16 entry_count; +}; + +/** + * pvr_page_table_l2_init() - Initialize a level 2 page table. + * @table: Target level 2 page table. + * @pvr_dev: Target PowerVR device + * + * It is expected that @table be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * Any error encountered while intializing &table->backing_page using + * pvr_mmu_backing_page_init(). + */ +static int +pvr_page_table_l2_init(struct pvr_page_table_l2 *table, + struct pvr_device *pvr_dev) +{ + return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev); +} + +/** + * pvr_page_table_l2_fini() - Teardown a level 2 page table. + * @table: Target level 2 page table. + * + * It is an error to attempt to use @table after calling this function. + */ +static void +pvr_page_table_l2_fini(struct pvr_page_table_l2 *table) +{ + pvr_mmu_backing_page_fini(&table->backing_page); +} + +/** + * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the + * device. + * @table: Target level 2 page table. + * + * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the + * warning there applies here too: **Only call pvr_page_table_l2_sync() once + * you're sure you have no more changes to make to** @table **in the immediate + * future.** + * + * If child level 1 page tables of @table also need to be flushed, this should + * be done first using pvr_page_table_l1_sync() *before* calling this function. + */ +static void +pvr_page_table_l2_sync(struct pvr_page_table_l2 *table) +{ + pvr_mmu_backing_page_sync(&table->backing_page); +} + +/** + * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2 + * page table. + * @table: Target level 2 page table. + * + * Essentially returns the CPU address of the raw equivalent of @table, cast to + * a &struct pvr_page_table_l2_raw pointer. + * + * You probably want to call pvr_page_table_l2_get_entry_raw() instead. + * + * Return: + * The raw equivalent of @table. + */ +static struct pvr_page_table_l2_raw * +pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table) +{ + return table->backing_page.host_ptr; +} + +/** + * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent + * of a mirror level 2 page table. + * @table: Target level 2 page table. + * @idx: Index of the entry to access. + * + * Technically this function returns a pointer to a slot in a raw level 2 page + * table, since the returned "entry" is not guaranteed to be valid. The caller + * must verify the validity of the entry at the returned address (perhaps using + * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before dereferencing the + * returned pointer. + * + * Return: + * A pointer to the requested raw level 2 page table entry. + */ +static struct pvr_page_table_l2_entry_raw * +pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx) +{ + return &pvr_page_table_l2_get_raw(table)->entries[idx]; +} + +/** + * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is + * marked as valid. + * @table: Target level 2 page table. + * @idx: Index of the entry to check. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before calling this + * function. + */ +static bool +pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx) +{ + struct pvr_page_table_l2_entry_raw entry_raw = + *pvr_page_table_l2_get_entry_raw(table, idx); + + return pvr_page_table_l2_entry_raw_is_valid(entry_raw); +} + +/** + * struct pvr_page_table_l1 - A wrapped level 1 page table. + * + * To access the raw part of this table, use pvr_page_table_l1_get_raw(). + * Alternatively to access a raw entry directly, use + * pvr_page_table_l1_get_entry_raw(). + */ +struct pvr_page_table_l1 { + /** + * @entries: The children of this node in the page table tree + * structure. These are also mirror tables. The indexing of this array + * is identical to that of the raw equivalent + * (&pvr_page_table_l0_raw.entries). + */ + struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE]; + + /** + * @backing_page: A handle to the memory which holds the raw + * equivalent of this table. **For internal use only.** + */ + struct pvr_mmu_backing_page backing_page; + + union { + /** + * @parent: The parent of this node in the page table tree structure. + * + * This is also a mirror table. + * + * Only valid when the L1 page table is active. When the L1 page table + * has been removed and queued for destruction, the next_free field + * should be used instead. + */ + struct pvr_page_table_l2 *parent; + + /** + * @next_free: Pointer to the next L1 page table to take/free. + * + * Used to form a linked list of L1 page tables. This is used + * when preallocating tables and when the page table has been + * removed and queued for destruction. + */ + struct pvr_page_table_l1 *next_free; + }; + + /** + * @parent_idx: The index of the entry in the parent table (see + * @parent) which corresponds to this table. + */ + u16 parent_idx; + + /** + * @entry_count: The current number of valid entries (that we know of) + * in this table. This value is essentially a refcount - the table is + * destroyed when this value is decremented to zero by + * pvr_page_table_l1_remove(). + */ + u16 entry_count; +}; + +/** + * pvr_page_table_l1_init() - Initialize a level 1 page table. + * @table: Target level 1 page table. + * @pvr_dev: Target PowerVR device + * + * When this function returns successfully, @table is still not considered + * valid. It must be inserted into the page table tree structure with + * pvr_page_table_l2_insert() before it is ready for use. + * + * It is expected that @table be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * Any error encountered while intializing &table->backing_page using + * pvr_mmu_backing_page_init(). + */ +static int +pvr_page_table_l1_init(struct pvr_page_table_l1 *table, + struct pvr_device *pvr_dev) +{ + table->parent_idx = PVR_IDX_INVALID; + + return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev); +} + +/** + * pvr_page_table_l1_free() - Teardown a level 1 page table. + * @table: Target level 1 page table. + * + * It is an error to attempt to use @table after calling this function, even + * indirectly. This includes calling pvr_page_table_l2_remove(), which must + * be called *before* pvr_page_table_l1_free(). + */ +static void +pvr_page_table_l1_free(struct pvr_page_table_l1 *table) +{ + pvr_mmu_backing_page_fini(&table->backing_page); + kfree(table); +} + +/** + * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the + * device. + * @table: Target level 1 page table. + * + * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the + * warning there applies here too: **Only call pvr_page_table_l1_sync() once + * you're sure you have no more changes to make to** @table **in the immediate + * future.** + * + * If child level 0 page tables of @table also need to be flushed, this should + * be done first using pvr_page_table_l0_sync() *before* calling this function. + */ +static void +pvr_page_table_l1_sync(struct pvr_page_table_l1 *table) +{ + pvr_mmu_backing_page_sync(&table->backing_page); +} + +/** + * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1 + * page table. + * @table: Target level 1 page table. + * + * Essentially returns the CPU address of the raw equivalent of @table, cast to + * a &struct pvr_page_table_l1_raw pointer. + * + * You probably want to call pvr_page_table_l1_get_entry_raw() instead. + * + * Return: + * The raw equivalent of @table. + */ +static struct pvr_page_table_l1_raw * +pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table) +{ + return table->backing_page.host_ptr; +} + +/** + * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent + * of a mirror level 1 page table. + * @table: Target level 1 page table. + * @idx: Index of the entry to access. + * + * Technically this function returns a pointer to a slot in a raw level 1 page + * table, since the returned "entry" is not guaranteed to be valid. The caller + * must verify the validity of the entry at the returned address (perhaps using + * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before dereferencing the + * returned pointer. + * + * Return: + * A pointer to the requested raw level 1 page table entry. + */ +static struct pvr_page_table_l1_entry_raw * +pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx) +{ + return &pvr_page_table_l1_get_raw(table)->entries[idx]; +} + +/** + * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is + * marked as valid. + * @table: Target level 1 page table. + * @idx: Index of the entry to check. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before calling this + * function. + */ +static bool +pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx) +{ + struct pvr_page_table_l1_entry_raw entry_raw = + *pvr_page_table_l1_get_entry_raw(table, idx); + + return pvr_page_table_l1_entry_raw_is_valid(entry_raw); +} + +/** + * struct pvr_page_table_l0 - A wrapped level 0 page table. + * + * To access the raw part of this table, use pvr_page_table_l0_get_raw(). + * Alternatively to access a raw entry directly, use + * pvr_page_table_l0_get_entry_raw(). + * + * There is no mirror representation of an individual page, so this type has no + * &entries member. + */ +struct pvr_page_table_l0 { + /** + * @backing_page: A handle to the memory which holds the raw + * equivalent of this table. **For internal use only.** + */ + struct pvr_mmu_backing_page backing_page; + + union { + /** + * @parent: The parent of this node in the page table tree structure. + * + * This is also a mirror table. + * + * Only valid when the L0 page table is active. When the L0 page table + * has been removed and queued for destruction, the next_free field + * should be used instead. + */ + struct pvr_page_table_l1 *parent; + + /** + * @next_free: Pointer to the next L0 page table to take/free. + * + * Used to form a linked list of L0 page tables. This is used + * when preallocating tables and when the page table has been + * removed and queued for destruction. + */ + struct pvr_page_table_l0 *next_free; + }; + + /** + * @parent_idx: The index of the entry in the parent table (see + * @parent) which corresponds to this table. + */ + u16 parent_idx; + + /** + * @entry_count: The current number of valid entries (that we know of) + * in this table. This value is essentially a refcount - the table is + * destroyed when this value is decremented to zero by + * pvr_page_table_l0_remove(). + */ + u16 entry_count; +}; + +/** + * pvr_page_table_l0_init() - Initialize a level 0 page table. + * @table: Target level 0 page table. + * @pvr_dev: Target PowerVR device + * + * When this function returns successfully, @table is still not considered + * valid. It must be inserted into the page table tree structure with + * pvr_page_table_l1_insert() before it is ready for use. + * + * It is expected that @table be zeroed (e.g. from kzalloc()) before calling + * this function. + * + * Return: + * * 0 on success, or + * * Any error encountered while intializing &table->backing_page using + * pvr_mmu_backing_page_init(). + */ +static int +pvr_page_table_l0_init(struct pvr_page_table_l0 *table, + struct pvr_device *pvr_dev) +{ + table->parent_idx = PVR_IDX_INVALID; + + return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev); +} + +/** + * pvr_page_table_l0_free() - Teardown a level 0 page table. + * @table: Target level 0 page table. + * + * It is an error to attempt to use @table after calling this function, even + * indirectly. This includes calling pvr_page_table_l1_remove(), which must + * be called *before* pvr_page_table_l0_free(). + */ +static void +pvr_page_table_l0_free(struct pvr_page_table_l0 *table) +{ + pvr_mmu_backing_page_fini(&table->backing_page); + kfree(table); +} + +/** + * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the + * device. + * @table: Target level 0 page table. + * + * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the + * warning there applies here too: **Only call pvr_page_table_l0_sync() once + * you're sure you have no more changes to make to** @table **in the immediate + * future.** + * + * If child pages of @table also need to be flushed, this should be done first + * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling + * this function. + */ +static void +pvr_page_table_l0_sync(struct pvr_page_table_l0 *table) +{ + pvr_mmu_backing_page_sync(&table->backing_page); +} + +/** + * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0 + * page table. + * @table: Target level 0 page table. + * + * Essentially returns the CPU address of the raw equivalent of @table, cast to + * a &struct pvr_page_table_l0_raw pointer. + * + * You probably want to call pvr_page_table_l0_get_entry_raw() instead. + * + * Return: + * The raw equivalent of @table. + */ +static struct pvr_page_table_l0_raw * +pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table) +{ + return table->backing_page.host_ptr; +} + +/** + * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent + * of a mirror level 0 page table. + * @table: Target level 0 page table. + * @idx: Index of the entry to access. + * + * Technically this function returns a pointer to a slot in a raw level 0 page + * table, since the returned "entry" is not guaranteed to be valid. The caller + * must verify the validity of the entry at the returned address (perhaps using + * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before dereferencing the + * returned pointer. This is espcially important for level 0 page tables, which + * can have a variable number of entries. + * + * Return: + * A pointer to the requested raw level 0 page table entry. + */ +static struct pvr_page_table_l0_entry_raw * +pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx) +{ + return &pvr_page_table_l0_get_raw(table)->entries[idx]; +} + +/** + * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is + * marked as valid. + * @table: Target level 0 page table. + * @idx: Index of the entry to check. + * + * The value of @idx is not checked here; it is the callers responsibility to + * ensure @idx refers to a valid index within @table before calling this + * function. + */ +static bool +pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx) +{ + struct pvr_page_table_l0_entry_raw entry_raw = + *pvr_page_table_l0_get_entry_raw(table, idx); + + return pvr_page_table_l0_entry_raw_is_valid(entry_raw); +} + +/** + * struct pvr_mmu_context - context holding data for operations at page + * catalogue level, intended for use with a VM context. + */ +struct pvr_mmu_context { + /** @pvr_dev: The PVR device associated with the owning VM context. */ + struct pvr_device *pvr_dev; + + /** @page_table_l2: The MMU table root. */ + struct pvr_page_table_l2 page_table_l2; +}; + +enum pvr_mmu_sync_level { + PVR_MMU_SYNC_LEVEL_NONE = -1, + PVR_MMU_SYNC_LEVEL_0 = 0, + PVR_MMU_SYNC_LEVEL_1 = 1, + PVR_MMU_SYNC_LEVEL_2 = 2, +}; + +/** + * struct pvr_page_table_ptr - A reference to a single physical page as indexed + * by the page table structure. + * + * Intended for embedding in a &struct pvr_mmu_op_context. + */ +struct pvr_page_table_ptr { + /** + * @l1_table: A cached handle to the level 1 page table the + * context is currently traversing. + */ + struct pvr_page_table_l1 *l1_table; + + /** + * @l0_table: A cached handle to the level 0 page table the + * context is currently traversing. + */ + struct pvr_page_table_l0 *l0_table; + + /** + * @l2_idx: Index into the level 2 page table the context is + * currently referencing. + */ + u16 l2_idx; + + /** + * @l1_idx: Index into the level 1 page table the context is + * currently referencing. + */ + u16 l1_idx; + + /** + * @l0_idx: Index into the level 0 page table the context is + * currently referencing. + */ + u16 l0_idx; +}; + +/** + * struct pvr_mmu_op_context - context holding data for individual + * device-virtual mapping operations. Intended for use with a VM bind operation. + */ +struct pvr_mmu_op_context { + /** @mmu_ctx: The MMU context associated with the owning VM context. */ + struct pvr_mmu_context *mmu_ctx; + + /** @map: Data specifically for map operations. */ + struct { + /** + * @sgt: Scatter gather table containing pages pinned for use by + * this context - these are currently pinned when initialising + * the VM bind operation. + */ + struct sg_table *sgt; + + /** @sgt_offset: Start address of the device-virtual mapping. */ + u64 sgt_offset; + + /** + * @l1_prealloc_tables: Preallocated l1 page table objects + * use by this context when creating a page mapping. Linked list + * fully created during initialisation. + */ + struct pvr_page_table_l1 *l1_prealloc_tables; + + /** + * @l0_prealloc_tables: Preallocated l0 page table objects + * use by this context when creating a page mapping. Linked list + * fully created during initialisation. + */ + struct pvr_page_table_l0 *l0_prealloc_tables; + } map; + + /** @unmap: Data specifically for unmap operations. */ + struct { + /** + * @l1_free_tables: Collects page table objects freed by unmap + * ops. Linked list empty at creation. + */ + struct pvr_page_table_l1 *l1_free_tables; + + /** + * @l0_free_tables: Collects page table objects freed by unmap + * ops. Linked list empty at creation. + */ + struct pvr_page_table_l0 *l0_free_tables; + } unmap; + + /** + * @curr_page - A reference to a single physical page as indexed by + * the page table structure. + */ + struct pvr_page_table_ptr curr_page; + + /** + * @sync_level_required: The maximum level of the page table tree + * structure which has (possibly) been modified since it was last + * flushed to the device. + * + * This field should only be set with pvr_mmu_op_context_require_sync() + * or indirectly by pvr_mmu_op_context_sync_partial(). + */ + enum pvr_mmu_sync_level sync_level_required; +}; + +/** + * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page + * table into a level 2 page table. + * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page + * table into. + * @child_table: Target level 1 page table to be referenced by the new entry. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L2 entry. + * + * It is the caller's responsibility to execute any memory barries to ensure + * that the creation of @child_table is ordered before the L2 entry is inserted. + */ +static void +pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx, + struct pvr_page_table_l1 *child_table) +{ + struct pvr_page_table_l2 *l2_table = + &op_ctx->mmu_ctx->page_table_l2; + struct pvr_page_table_l2_entry_raw *entry_raw = + pvr_page_table_l2_get_entry_raw(l2_table, + op_ctx->curr_page.l2_idx); + + pvr_page_table_l2_entry_raw_set(entry_raw, + child_table->backing_page.dma_addr); + + child_table->parent = l2_table; + child_table->parent_idx = op_ctx->curr_page.l2_idx; + l2_table->entries[op_ctx->curr_page.l2_idx] = child_table; + ++l2_table->entry_count; + op_ctx->curr_page.l1_table = child_table; +} + +/** + * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page + * table. + * @op_ctx: Target MMU op context pointing at the L2 entry to remove. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L2 entry. + */ +static void +pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx) +{ + struct pvr_page_table_l2 *l2_table = + &op_ctx->mmu_ctx->page_table_l2; + struct pvr_page_table_l2_entry_raw *entry_raw = + pvr_page_table_l2_get_entry_raw(l2_table, + op_ctx->curr_page.l1_table->parent_idx); + + WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table); + + pvr_page_table_l2_entry_raw_clear(entry_raw); + + l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL; + op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID; + op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables; + op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table; + op_ctx->curr_page.l1_table = NULL; + + --l2_table->entry_count; +} + +/** + * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page + * table into a level 1 page table. + * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page + * table into. + * @child_table: L0 page table to insert. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L1 entry. + * + * It is the caller's responsibility to execute any memory barries to ensure + * that the creation of @child_table is ordered before the L1 entry is inserted. + */ +static void +pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx, + struct pvr_page_table_l0 *child_table) +{ + struct pvr_page_table_l1_entry_raw *entry_raw = + pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table, + op_ctx->curr_page.l1_idx); + + pvr_page_table_l1_entry_raw_set(entry_raw, + child_table->backing_page.dma_addr); + + child_table->parent = op_ctx->curr_page.l1_table; + child_table->parent_idx = op_ctx->curr_page.l1_idx; + op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table; + ++op_ctx->curr_page.l1_table->entry_count; + op_ctx->curr_page.l0_table = child_table; +} + +/** + * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page + * table. + * @op_ctx: Target MMU op context pointing at the L1 entry to remove. + * + * If this function results in the L1 table becoming empty, it will be removed + * from its parent level 2 page table and destroyed. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L1 entry. + */ +static void +pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx) +{ + struct pvr_page_table_l1_entry_raw *entry_raw = + pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent, + op_ctx->curr_page.l0_table->parent_idx); + + WARN_ON(op_ctx->curr_page.l0_table->parent != + op_ctx->curr_page.l1_table); + + pvr_page_table_l1_entry_raw_clear(entry_raw); + + op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL; + op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID; + op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables; + op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table; + op_ctx->curr_page.l0_table = NULL; + + if (--op_ctx->curr_page.l1_table->entry_count == 0) { + /* Clear the parent L2 page table entry. */ + if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID) + pvr_page_table_l2_remove(op_ctx); + } +} + +/** + * pvr_page_table_l0_insert() - Insert an entry referring to a physical page + * into a level 0 page table. + * @op_ctx: Target MMU op context pointing at the L0 entry to insert. + * @dma_addr: Target DMA address to be referenced by the new entry. + * @flags: Page options to be stored in the new entry. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L0 entry. + */ +static void +pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx, + dma_addr_t dma_addr, struct pvr_page_flags_raw flags) +{ + struct pvr_page_table_l0_entry_raw *entry_raw = + pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx); + + pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags); + + /* + * There is no entry to set here - we don't keep a mirror of + * individual pages. + */ + + ++op_ctx->curr_page.l0_table->entry_count; +} + +/** + * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page + * table. + * @op_ctx: Target MMU op context pointing at the L0 entry to remove. + * + * If this function results in the L0 table becoming empty, it will be removed + * from its parent L1 page table and destroyed. + * + * It is the caller's responsibility to ensure @op_ctx.curr_page points to a + * valid L0 entry. + */ +static void +pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx) +{ + struct pvr_page_table_l0_entry_raw *entry_raw = + pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx); + + pvr_page_table_l0_entry_raw_clear(entry_raw); + + /* + * There is no entry to clear here - we don't keep a mirror of + * individual pages. + */ + + if (--op_ctx->curr_page.l0_table->entry_count == 0) { + /* Clear the parent L1 page table entry. */ + if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID) + pvr_page_table_l1_remove(op_ctx); + } +} + +/** + * DOC: Page table index utilities + */ + +/** + * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a + * device-virtual address. + * @device_addr: Target device-virtual address. + * + * This function does not perform any bounds checking - it is the caller's + * responsibility to ensure that @device_addr is valid before interpreting + * the result. + * + * Return: + * The index into a level 2 page table corresponding to @device_addr. + */ +static u16 +pvr_page_table_l2_idx(u64 device_addr) +{ + return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >> + ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT; +} + +/** + * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a + * device-virtual address. + * @device_addr: Target device-virtual address. + * + * This function does not perform any bounds checking - it is the caller's + * responsibility to ensure that @device_addr is valid before interpreting + * the result. + * + * Return: + * The index into a level 1 page table corresponding to @device_addr. + */ +static u16 +pvr_page_table_l1_idx(u64 device_addr) +{ + return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >> + ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT; +} + +/** + * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a + * device-virtual address. + * @device_addr: Target device-virtual address. + * + * This function does not perform any bounds checking - it is the caller's + * responsibility to ensure that @device_addr is valid before interpreting + * the result. + * + * Return: + * The index into a level 0 page table corresponding to @device_addr. + */ +static u16 +pvr_page_table_l0_idx(u64 device_addr) +{ + return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >> + ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT; +} + +/** + * DOC: High-level page table operations + */ + +/** + * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if + * necessary) a level 1 page table from the specified level 2 page table entry. + * @op_ctx: Target MMU op context. + * @should_insert: [IN] Specifies whether new page tables should be inserted + * when empty page table entries are encountered during traversal. + * + * Return: + * * 0 on success, or + * + * If @should_insert is %false: + * * -%ENXIO if a level 1 page table would have been inserted. + * + * If @should_insert is %true: + * * Any error encountered while inserting the level 1 page table. + */ +static int +pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx, + bool should_insert) +{ + struct pvr_page_table_l2 *l2_table = + &op_ctx->mmu_ctx->page_table_l2; + struct pvr_page_table_l1 *table; + + if (pvr_page_table_l2_entry_is_valid(l2_table, + op_ctx->curr_page.l2_idx)) { + op_ctx->curr_page.l1_table = + l2_table->entries[op_ctx->curr_page.l2_idx]; + return 0; + } + + if (!should_insert) + return -ENXIO; + + /* Take a prealloced table. */ + table = op_ctx->map.l1_prealloc_tables; + if (!table) + return -ENOMEM; + + /* Pop */ + op_ctx->map.l1_prealloc_tables = table->next_free; + table->next_free = NULL; + + /* Ensure new table is fully written out before adding to L2 page table. */ + wmb(); + + pvr_page_table_l2_insert(op_ctx, table); + + return 0; +} + +/** + * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if + * necessary) a level 0 page table from the specified level 1 page table entry. + * @op_ctx: Target MMU op context. + * @should_insert: [IN] Specifies whether new page tables should be inserted + * when empty page table entries are encountered during traversal. + * + * Return: + * * 0 on success, + * + * If @should_insert is %false: + * * -%ENXIO if a level 0 page table would have been inserted. + * + * If @should_insert is %true: + * * Any error encountered while inserting the level 0 page table. + */ +static int +pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx, + bool should_insert) +{ + struct pvr_page_table_l0 *table; + + if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table, + op_ctx->curr_page.l1_idx)) { + op_ctx->curr_page.l0_table = + op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx]; + return 0; + } + + if (!should_insert) + return -ENXIO; + + /* Take a prealloced table. */ + table = op_ctx->map.l0_prealloc_tables; + if (!table) + return -ENOMEM; + + /* Pop */ + op_ctx->map.l0_prealloc_tables = table->next_free; + table->next_free = NULL; + + /* Ensure new table is fully written out before adding to L1 page table. */ + wmb(); + + pvr_page_table_l1_insert(op_ctx, table); + + return 0; +} + +/** + * pvr_mmu_context_create() - Create an MMU context. + * @pvr_dev: PVR device associated with owning VM context. + * + * Returns: + * * Newly created MMU context object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l2_init(). + */ +struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev) +{ + struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + int err; + + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev); + if (err) + return ERR_PTR(err); + + ctx->pvr_dev = pvr_dev; + + return ctx; +} + +/** + * pvr_mmu_context_destroy() - Destroy an MMU context. + * @ctx: Target MMU context. + */ +void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx) +{ + pvr_page_table_l2_fini(&ctx->page_table_l2); + kfree(ctx); +} + +/** + * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the + * page table structure behind a VM context. + * @root: Target MMU page table root. + */ +dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx) +{ + return ctx->page_table_l2.backing_page.dma_addr; +} + +/** + * pvr_page_table_l1_alloc() - Allocate a l1 page_table object. + * @ctx: MMU context of owning VM context. + * + * Returns: + * * Newly created page table object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l1_init(). + */ +static struct pvr_page_table_l1 * +pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx) +{ + int err; + + struct pvr_page_table_l1 *table = + kzalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return ERR_PTR(-ENOMEM); + + err = pvr_page_table_l1_init(table, ctx->pvr_dev); + if (err) { + kfree(table); + return ERR_PTR(err); + } + + return table; +} + +/** + * pvr_page_table_l0_alloc() - Allocate a l0 page_table object. + * @ctx: MMU context of owning VM context. + * + * Returns: + * * Newly created page table object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l0_init(). + */ +static struct pvr_page_table_l0 * +pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx) +{ + int err; + + struct pvr_page_table_l0 *table = + kzalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return ERR_PTR(-ENOMEM); + + err = pvr_page_table_l0_init(table, ctx->pvr_dev); + if (err) { + kfree(table); + return ERR_PTR(err); + } + + return table; +} + +/** + * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a + * sync operation for the referenced page tables up to a specified level. + * @op_ctx: Target MMU op context. + * @level: Maximum page table level for which a sync is required. + */ +static void +pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx, + enum pvr_mmu_sync_level level) +{ + if (op_ctx->sync_level_required < level) + op_ctx->sync_level_required = level; +} + +/** + * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the + * page tables referenced by a MMU op context. + * @op_ctx: Target MMU op context. + * @level: Maximum page table level to sync. + * + * Do not call this function directly. Instead use + * pvr_mmu_op_context_sync_partial() which is checked against the current + * value of &op_ctx->sync_level_required as set by + * pvr_mmu_op_context_require_sync(). + */ +static void +pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx, + enum pvr_mmu_sync_level level) +{ + /* + * We sync the page table levels in ascending order (starting from the + * leaf node) to ensure consistency. + */ + + WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE); + + if (level <= PVR_MMU_SYNC_LEVEL_NONE) + return; + + if (op_ctx->curr_page.l0_table) + pvr_page_table_l0_sync(op_ctx->curr_page.l0_table); + + if (level < PVR_MMU_SYNC_LEVEL_1) + return; + + if (op_ctx->curr_page.l1_table) + pvr_page_table_l1_sync(op_ctx->curr_page.l1_table); + + if (level < PVR_MMU_SYNC_LEVEL_2) + return; + + pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2); +} + +/** + * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the + * page tables referenced by a MMU op context. + * @op_ctx: Target MMU op context. + * @level: Requested page table level to sync up to (inclusive). + * + * If @level is greater than the maximum level recorded by @op_ctx as requiring + * a sync operation, only the previously recorded maximum will be used. + * + * Additionally, if @level is greater than or equal to the maximum level + * recorded by @op_ctx as requiring a sync operation, that maximum level will be + * reset as a full sync will be performed. This is equivalent to calling + * pvr_mmu_op_context_sync(). + */ +static void +pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx, + enum pvr_mmu_sync_level level) +{ + /* + * If the requested sync level is greater than or equal to the + * currently required sync level, we do two things: + * * Don't waste time syncing levels we haven't previously marked as + * requiring a sync, and + * * Reset the required sync level since we are about to sync + * everything that was previously marked as requiring a sync. + */ + if (level >= op_ctx->sync_level_required) { + level = op_ctx->sync_level_required; + op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE; + } + + pvr_mmu_op_context_sync_manual(op_ctx, level); +} + +/** + * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by + * a MMU op context. + * @op_ctx: Target MMU op context. + * + * The maximum level marked internally as requiring a sync will be reset so + * that subsequent calls to this function will be no-ops unless @op_ctx is + * otherwise updated. + */ +static void +pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx) +{ + pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required); + + op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE; +} + +/** + * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of + * the page table tree structure needed to reference the physical page + * referenced by a MMU op context. + * @op_ctx: Target MMU op context. + * @should_create: Specifies whether new page tables should be created when + * empty page table entries are encountered during traversal. + * @load_level_required: Maximum page table level to load. + * + * If @should_create is %true, this function may modify the stored required + * sync level of @op_ctx as new page tables are created and inserted into their + * respective parents. + * + * Since there is only one root page table, it is technically incorrect to call + * this function with a value of @load_level_required greater than or equal to + * the root level number. However, this is not explicitly disallowed here. + * + * Return: + * * 0 on success, + * * Any error returned by pvr_page_table_l1_get_or_create() if + * @load_level_required >= 1 except -%ENXIO, or + * * Any error returned by pvr_page_table_l0_get_or_create() if + * @load_level_required >= 0 except -%ENXIO. + */ +static int +pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx, + bool should_create, + enum pvr_mmu_sync_level load_level_required) +{ + const struct pvr_page_table_l1 *l1_head_before = + op_ctx->map.l1_prealloc_tables; + const struct pvr_page_table_l0 *l0_head_before = + op_ctx->map.l0_prealloc_tables; + int err; + + /* Clear tables we're about to fetch in case of error states. */ + if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) + op_ctx->curr_page.l1_table = NULL; + + if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) + op_ctx->curr_page.l0_table = NULL; + + /* Get or create L1 page table. */ + if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) { + err = pvr_page_table_l1_get_or_insert(op_ctx, should_create); + if (err) { + /* + * If @should_create is %false and no L1 page table was + * found, return early but without an error. Since + * pvr_page_table_l1_get_or_create() can only return + * -%ENXIO if @should_create is %false, there is no + * need to check it here. + */ + if (err == -ENXIO) + err = 0; + + return err; + } + } + + /* Get or create L0 page table. */ + if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) { + err = pvr_page_table_l0_get_or_insert(op_ctx, should_create); + if (err) { + /* + * If @should_create is %false and no L0 page table was + * found, return early but without an error. Since + * pvr_page_table_l0_get_or_insert() can only return + * -%ENXIO if @should_create is %false, there is no + * need to check it here. + */ + if (err == -ENXIO) + err = 0; + + /* + * At this point, an L1 page table could have been + * inserted but is now empty due to the failed attempt + * at inserting an L0 page table. In this instance, we + * must remove the empty L1 page table ourselves as + * pvr_page_table_l1_remove() is never called as part + * of the error path in + * pvr_page_table_l0_get_or_insert(). + */ + if (l1_head_before != op_ctx->map.l1_prealloc_tables) { + pvr_page_table_l2_remove(op_ctx); + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2); + } + + return err; + } + } + + /* + * A sync is only needed if table objects were inserted. This can be + * inferred by checking if the pointer at the head of the linked list + * has changed. + */ + if (l1_head_before != op_ctx->map.l1_prealloc_tables) + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2); + else if (l0_head_before != op_ctx->map.l0_prealloc_tables) + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1); + + return 0; +} + +/** + * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op + * context, syncing any page tables previously assigned to it which are no + * longer relevant. + * @op_ctx: Target MMU op context. + * @device_addr: New pointer target. + * @should_create: Specify whether new page tables should be created when + * empty page table entries are encountered during traversal. + * + * This function performs a full sync on the pointer, regardless of which + * levels are modified. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_mmu_op_context_load_tables(). + */ +static int +pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx, + u64 device_addr, bool should_create) +{ + pvr_mmu_op_context_sync(op_ctx); + + op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr); + op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr); + op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr); + op_ctx->curr_page.l1_table = NULL; + op_ctx->curr_page.l0_table = NULL; + + return pvr_mmu_op_context_load_tables(op_ctx, should_create, + PVR_MMU_SYNC_LEVEL_1); +} + +/** + * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op + * context. + * @op_ctx: Target MMU op context. + * @should_create: Specify whether new page tables should be created when + * empty page table entries are encountered during traversal. + * + * If @should_create is %false, it is the caller's responsibility to verify that + * the state of the table references in @op_ctx is valid on return. If -%ENXIO + * is returned, at least one of the table references is invalid. It should be + * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is + * returned, unlike other error codes. The caller should check which references + * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed + * to be valid, since it represents the root of the page table tree structure. + * + * Return: + * * 0 on success, + * * -%EPERM if the operation would wrap at the top of the page table + * hierarchy, + * * -%ENXIO if @should_create is %false and a page table of any level would + * have otherwise been created, or + * * Any error returned while attempting to create missing page tables if + * @should_create is %true. + */ +static int +pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx, + bool should_create) +{ + s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE; + + if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X) + goto load_tables; + + op_ctx->curr_page.l0_idx = 0; + load_level_required = PVR_MMU_SYNC_LEVEL_0; + + if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE) + goto load_tables; + + op_ctx->curr_page.l1_idx = 0; + load_level_required = PVR_MMU_SYNC_LEVEL_1; + + if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE) + goto load_tables; + + /* + * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to + * zero here. However, that would wrap the top layer of the page table + * hierarchy which is not a valid operation. Instead, we warn and return + * an error. + */ + WARN(true, + "%s(%p) attempted to loop the top of the page table hierarchy", + __func__, op_ctx); + return -EPERM; + + /* If indices have wrapped, we need to load new tables. */ +load_tables: + /* First, flush tables which will be unloaded. */ + pvr_mmu_op_context_sync_partial(op_ctx, load_level_required); + + /* Then load tables from the required level down. */ + return pvr_mmu_op_context_load_tables(op_ctx, should_create, + load_level_required); +} + +/** + * DOC: Single page operations + */ + +/** + * pvr_page_create() - Create a device-virtual memory page and insert it into + * a level 0 page table. + * @op_ctx: Target MMU op context pointing at the device-virtual address of the + * target page. + * @dma_addr: DMA address of the physical page backing the created page. + * @flags: Page options saved on the level 0 page table entry for reading by + * the device. + * + * Return: + * * 0 on success, or + * * -%EEXIST if the requested page already exists. + */ +static int +pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr, + struct pvr_page_flags_raw flags) +{ + /* Do not create a new page if one already exists. */ + if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx)) { + return -EEXIST; + } + + pvr_page_table_l0_insert(op_ctx, dma_addr, flags); + + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0); + + return 0; +} + +/** + * pvr_page_destroy() - Destroy a device page after removing it from its + * parent level 0 page table. + * @op_ctx: Target MMU op context. + * @ptr: Page table pointer to the device-virtual address of the target page. + */ +static void +pvr_page_destroy(struct pvr_mmu_op_context *op_ctx) +{ + /* Do nothing if the page does not exist. */ + if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table, + op_ctx->curr_page.l0_idx)) { + return; + } + + /* Clear the parent L0 page table entry. */ + pvr_page_table_l0_remove(op_ctx); + + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0); +} + +/** + * pvr_mmu_op_context_destroy() - Destroy an MMU op context. + * @op_ctx: Target MMU op context. + */ +void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx) +{ + const bool flush_caches = + op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE; + + pvr_mmu_op_context_sync(op_ctx); + + if (flush_caches) + WARN_ON(pvr_mmu_flush(op_ctx->mmu_ctx->pvr_dev)); + + while (op_ctx->map.l0_prealloc_tables) { + struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables; + + op_ctx->map.l0_prealloc_tables = + op_ctx->map.l0_prealloc_tables->next_free; + pvr_page_table_l0_free(tmp); + } + + while (op_ctx->map.l1_prealloc_tables) { + struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables; + + op_ctx->map.l1_prealloc_tables = + op_ctx->map.l1_prealloc_tables->next_free; + pvr_page_table_l1_free(tmp); + } + + while (op_ctx->unmap.l0_free_tables) { + struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables; + + op_ctx->unmap.l0_free_tables = + op_ctx->unmap.l0_free_tables->next_free; + pvr_page_table_l0_free(tmp); + } + + while (op_ctx->unmap.l1_free_tables) { + struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables; + + op_ctx->unmap.l1_free_tables = + op_ctx->unmap.l1_free_tables->next_free; + pvr_page_table_l1_free(tmp); + } + + kfree(op_ctx); +} + +/** + * pvr_mmu_op_context_create() - Create an MMU op context. + * @ctx: MMU context associated with owning VM context. + * @sgt: Scatter gather table containing pages pinned for use by this context. + * @sgt_offset: Start offset of the requested device-virtual memory mapping. + * @size: Size in bytes of the requested device-virtual memory mapping. For an + * unmapping, this should be zero so that no page tables are allocated. + * + * Returns: + * * Newly created MMU op context object on success, or + * * -%ENOMEM if no memory is available, + * * Any error code returned by pvr_page_table_l2_init(). + */ +struct pvr_mmu_op_context * +pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt, + u64 sgt_offset, u64 size) +{ + int err; + + struct pvr_mmu_op_context *op_ctx = + kzalloc(sizeof(*op_ctx), GFP_KERNEL); + + if (!op_ctx) + return ERR_PTR(-ENOMEM); + + op_ctx->mmu_ctx = ctx; + op_ctx->map.sgt = sgt; + op_ctx->map.sgt_offset = sgt_offset; + op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE; + + if (size) { + /* + * The number of page table objects we need to prealloc is + * indicated by the mapping size, start offset and the sizes + * of the areas mapped per PT or PD. The range calculation is + * identical to that for the index into a table for a device + * address, so we reuse those functions here. + */ + const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset); + const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size); + const u32 l1_count = l1_end_idx - l1_start_idx + 1; + const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset); + const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size); + const u32 l0_count = l0_end_idx - l0_start_idx + 1; + + /* + * Alloc and push page table entries until we have enough of + * each type, ending with linked lists of l0 and l1 entries in + * reverse order. + */ + for (int i = 0; i < l1_count; i++) { + struct pvr_page_table_l1 *l1_tmp = + pvr_page_table_l1_alloc(ctx); + + err = PTR_ERR_OR_ZERO(l1_tmp); + if (err) + goto err_cleanup; + + l1_tmp->next_free = op_ctx->map.l1_prealloc_tables; + op_ctx->map.l1_prealloc_tables = l1_tmp; + } + + for (int i = 0; i < l0_count; i++) { + struct pvr_page_table_l0 *l0_tmp = + pvr_page_table_l0_alloc(ctx); + + err = PTR_ERR_OR_ZERO(l0_tmp); + if (err) + goto err_cleanup; + + l0_tmp->next_free = op_ctx->map.l0_prealloc_tables; + op_ctx->map.l0_prealloc_tables = l0_tmp; + } + } + + return op_ctx; + +err_cleanup: + pvr_mmu_op_context_destroy(op_ctx); + + return ERR_PTR(err); +} + +/** + * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context + * starting from the current page of an MMU op context. + * @op_ctx: Target MMU op context pointing at the first page to unmap. + * @nr_pages: Number of pages to unmap. + * + * Return: + * * 0 on success, or + * * Any error encountered while advancing @op_ctx.curr_page with + * pvr_mmu_op_context_next_page() (except -%ENXIO). + */ +static int +pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx, + u64 nr_pages) +{ + int err; + + if (nr_pages == 0) + return 0; + + /* + * Destroy first page outside loop, as it doesn't require a page + * advance beforehand. If the L0 page table reference in + * @op_ctx.curr_page is %NULL, there cannot be a mapped page at + * @op_ctx.curr_page (so skip ahead). + */ + if (op_ctx->curr_page.l0_table) + pvr_page_destroy(op_ctx); + + for (u64 page = 1; page < nr_pages; ++page) { + err = pvr_mmu_op_context_next_page(op_ctx, false); + /* + * If the page table tree structure at @op_ctx.curr_page is + * incomplete, skip ahead. We don't care about unmapping pages + * that cannot exist. + * + * FIXME: This could be made more efficient by jumping ahead + * using pvr_mmu_op_context_set_curr_page(). + */ + if (err == -ENXIO) + continue; + else if (err) + return err; + + pvr_page_destroy(op_ctx); + } + + return 0; +} + +/** + * pvr_mmu_unmap() - Unmap pages from a memory context. + * @op_ctx: Target MMU op context. + * @device_addr: First device-virtual address to unmap. + * @size: Size in bytes to unmap. + * + * The total amount of device-virtual memory unmapped is + * @nr_pages * %PVR_DEVICE_PAGE_SIZE. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_page_table_ptr_init(), or + * * Any error code returned by pvr_page_table_ptr_unmap(). + */ +int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size) +{ + int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false); + + if (err) + return err; + + return pvr_mmu_op_context_unmap_curr_page(op_ctx, + size >> PVR_DEVICE_PAGE_SHIFT); +} + +/** + * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to + * device-virtual memory. + * @op_ctx: Target MMU op context pointing to the first page that should be + * mapped. + * @sgl: Target scatter-gather table entry. + * @offset: Offset into @sgl to map from. Must result in a starting address + * from @sgl which is CPU page-aligned. + * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple + * of the device page size. + * @page_flags: Page options to be applied to every device-virtual memory page + * in the created mapping. + * + * Return: + * * 0 on success, + * * -%EINVAL if the range specified by @offset and @size is not completely + * within @sgl, or + * * Any error encountered while creating a page with pvr_page_create(), or + * * Any error encountered while advancing @op_ctx.curr_page with + * pvr_mmu_op_context_next_page(). + */ +static int +pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl, + u64 offset, u64 size, struct pvr_page_flags_raw page_flags) +{ + const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT; + dma_addr_t dma_addr = sg_dma_address(sgl) + offset; + const unsigned int dma_len = sg_dma_len(sgl); + struct pvr_page_table_ptr ptr_copy; + unsigned int page; + int err; + + if (size > dma_len || offset > dma_len - size) + return -EINVAL; + + /* + * Before progressing, save a copy of the start pointer so we can use + * it again if we enter an error state and have to destroy pages. + */ + memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy)); + + /* + * Create first page outside loop, as it doesn't require a page advance + * beforehand. + */ + err = pvr_page_create(op_ctx, dma_addr, page_flags); + if (err) + return err; + + for (page = 1; page < pages; ++page) { + err = pvr_mmu_op_context_next_page(op_ctx, true); + if (err) + goto err_destroy_pages; + + dma_addr += PVR_DEVICE_PAGE_SIZE; + + err = pvr_page_create(op_ctx, dma_addr, page_flags); + if (err) + goto err_destroy_pages; + } + + return 0; + +err_destroy_pages: + memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page)); + err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page); + + return err; +} + +/** + * pvr_mmu_map() - Map an object's virtual memory to physical memory. + * @op_ctx: Target MMU op context. + * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple + * of the device page size. + * @flags: Flags from pvr_gem_object associated with the mapping. + * @device_addr: Virtual device address to map to. Must be device page-aligned. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_page_table_ptr_init(), or + * * Any error code returned by pvr_mmu_map_sgl(), or + * * Any error code returned by pvr_page_table_ptr_next_page(). + */ +int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags, + u64 device_addr) +{ + struct pvr_page_table_ptr ptr_copy; + struct pvr_page_flags_raw flags_raw; + struct scatterlist *sgl; + u64 mapped_size = 0; + unsigned int count; + int err; + + if (!size) + return 0; + + if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK) + return -EINVAL; + + err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true); + if (err) + return -EINVAL; + + memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy)); + + flags_raw = pvr_page_flags_raw_create(false, false, + flags & DRM_PVR_BO_DEVICE_BYPASS_CACHE, + flags & DRM_PVR_BO_DEVICE_PM_FW_PROTECT); + + /* Map scatter gather table */ + for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) { + const size_t sgl_len = sg_dma_len(sgl); + u64 sgl_offset, map_sgl_len; + + if (sgl_len <= op_ctx->map.sgt_offset) { + op_ctx->map.sgt_offset -= sgl_len; + continue; + } + + sgl_offset = op_ctx->map.sgt_offset; + map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size); + + err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len, + flags_raw); + if (err) + break; + + /* + * Flag the L0 page table as requiring a flush when the MMU op + * context is destroyed. + */ + pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0); + + op_ctx->map.sgt_offset = 0; + mapped_size += map_sgl_len; + + if (mapped_size >= size) + break; + + err = pvr_mmu_op_context_next_page(op_ctx, true); + if (err) + break; + } + + if (err && mapped_size) { + memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page)); + pvr_mmu_op_context_unmap_curr_page(op_ctx, + mapped_size >> PVR_DEVICE_PAGE_SHIFT); + } + + return err; +} diff --git a/drivers/gpu/drm/imagination/pvr_mmu.h b/drivers/gpu/drm/imagination/pvr_mmu.h new file mode 100644 index 000000000000..bf93c5ffc86a --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_mmu.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_MMU_H +#define PVR_MMU_H + +#include <linux/memory.h> +#include <linux/types.h> + +/* Forward declaration from "pvr_device.h" */ +struct pvr_device; + +/* Forward declaration from "pvr_mmu.c" */ +struct pvr_mmu_context; +struct pvr_mmu_op_context; + +/* Forward declaration from "pvr_vm.c" */ +struct pvr_vm_context; + +/* Forward declaration from <linux/scatterlist.h> */ +struct sg_table; + +/** + * DOC: Public API (constants) + * + * .. c:macro:: PVR_DEVICE_PAGE_SIZE + * + * Fixed page size referenced by leaf nodes in the page table tree + * structure. In the current implementation, this value is pegged to the + * CPU page size (%PAGE_SIZE). It is therefore an error to specify a CPU + * page size which is not also a supported device page size. The supported + * device page sizes are: 4KiB, 16KiB, 64KiB, 256KiB, 1MiB and 2MiB. + * + * .. c:macro:: PVR_DEVICE_PAGE_SHIFT + * + * Shift value used to efficiently multiply or divide by + * %PVR_DEVICE_PAGE_SIZE. + * + * This value is derived from %PVR_DEVICE_PAGE_SIZE. + * + * .. c:macro:: PVR_DEVICE_PAGE_MASK + * + * Mask used to round a value down to the nearest multiple of + * %PVR_DEVICE_PAGE_SIZE. When bitwise negated, it will indicate whether a + * value is already a multiple of %PVR_DEVICE_PAGE_SIZE. + * + * This value is derived from %PVR_DEVICE_PAGE_SIZE. + */ + +/* PVR_DEVICE_PAGE_SIZE determines the page size */ +#define PVR_DEVICE_PAGE_SIZE (PAGE_SIZE) +#define PVR_DEVICE_PAGE_SHIFT (PAGE_SHIFT) +#define PVR_DEVICE_PAGE_MASK (PAGE_MASK) + +/** + * DOC: Page table index utilities (constants) + * + * .. c:macro:: PVR_PAGE_TABLE_ADDR_SPACE_SIZE + * + * Size of device-virtual address space which can be represented in the page + * table structure. + * + * This value is checked at runtime against + * &pvr_device_features.virtual_address_space_bits by + * pvr_vm_create_context(), which will return an error if the feature value + * does not match this constant. + * + * .. admonition:: Future work + * + * It should be possible to support other values of + * &pvr_device_features.virtual_address_space_bits, but so far no + * hardware has been created which advertises an unsupported value. + * + * .. c:macro:: PVR_PAGE_TABLE_ADDR_BITS + * + * Number of bits needed to represent any value less than + * %PVR_PAGE_TABLE_ADDR_SPACE_SIZE exactly. + * + * .. c:macro:: PVR_PAGE_TABLE_ADDR_MASK + * + * Bitmask of device-virtual addresses which are valid in the page table + * structure. + * + * This value is derived from %PVR_PAGE_TABLE_ADDR_SPACE_SIZE, so the same + * notes on that constant apply here. + */ +#define PVR_PAGE_TABLE_ADDR_SPACE_SIZE SZ_1T +#define PVR_PAGE_TABLE_ADDR_BITS __ffs(PVR_PAGE_TABLE_ADDR_SPACE_SIZE) +#define PVR_PAGE_TABLE_ADDR_MASK (PVR_PAGE_TABLE_ADDR_SPACE_SIZE - 1) + +int pvr_mmu_flush(struct pvr_device *pvr_dev); + +struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev); +void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx); + +dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx); + +void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx); +struct pvr_mmu_op_context * +pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, + struct sg_table *sgt, u64 sgt_offset, u64 size); + +int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags, + u64 device_addr); +int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size); + +#endif /* PVR_MMU_H */ + diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c new file mode 100644 index 000000000000..ec5ad49d3bdd --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -0,0 +1,947 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#include "pvr_vm.h" + +#include "pvr_device.h" +#include "pvr_drv.h" +#include "pvr_gem.h" +#include "pvr_mmu.h" +#include "pvr_rogue_fwif.h" +#include "pvr_rogue_heap_config.h" + +#include <drm/drm_gem.h> +#include <drm/drm_gpuvm.h> + +#include <linux/container_of.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp_types.h> +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/stddef.h> + +/** + * DOC: Memory context + * + * This is the "top level" datatype in the VM code. It's exposed in the public + * API as an opaque handle. + */ + +/** + * struct pvr_vm_context - Context type which encapsulates an entire page table + * tree structure. + * @pvr_dev: The PowerVR device to which this context is bound. + * + * This binding is immutable for the life of the context. + * @mmu_ctx: The context for binding to physical memory. + * @gpuvm_mgr: GPUVM object associated with this context. + * @lock: Global lock on this entire structure of page tables. + * @fw_mem_ctx_obj: Firmware object representing firmware memory context. + * @ref_count: Reference count of object. + */ +struct pvr_vm_context { + struct pvr_device *pvr_dev; + struct pvr_mmu_context *mmu_ctx; + struct drm_gpuvm gpuvm_mgr; + struct mutex lock; + struct pvr_fw_object *fw_mem_ctx_obj; + struct kref ref_count; +}; + +/** + * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the + * page table structure behind a VM context. + * @vm_ctx: Target VM context. + */ +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx) +{ + return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx); +} + +/** + * DOC: Memory mappings + */ + +/** + * struct pvr_vm_gpuva - Wrapper type representing a single VM mapping. + */ +struct pvr_vm_gpuva { + /** @base: The wrapped drm_gpuva object. */ + struct drm_gpuva base; + + /** + * @node: Linked list node, to build a list of mappings to cleanup after + * unmapping, so that all associated &struct pvr_gem_object can be freed + * outside of callback context. + */ + struct list_head node; +}; + +static __always_inline +struct pvr_vm_gpuva *to_pvr_vm_gpuva(struct drm_gpuva *gpuva) +{ + return container_of(gpuva, struct pvr_vm_gpuva, base); +} + +/** + * pvr_vm_gpuva_mapping_init() - Setup a mapping object. + * @va: Pointer to pvr_vm_gpuva mapping object. + * + * The parameters of the mapping are handled internally by DRM GPUVA manager. + */ +static void +pvr_vm_gpuva_mapping_init(struct pvr_vm_gpuva *va) +{ + INIT_LIST_HEAD(&va->node); +} + +/** + * struct pvr_vm_gpuva_op_ctx - Context of a map/unmap operation. + */ +struct pvr_vm_gpuva_op_ctx { + /** @pvr_obj: Object associated with mapping (map only). */ + struct pvr_gem_object *pvr_obj; + + /** + * @vm_ctx: VM context where the mapping will be created or destroyed. + */ + struct pvr_vm_context *vm_ctx; + + /** @mmu_op_ctx: MMU op context. */ + struct pvr_mmu_op_context *mmu_op_ctx; + + /** + * @new_va: Prealloced VA mapping object (init in callback). + * Used when creating a mapping. + */ + struct pvr_vm_gpuva *new_va; + + /** + * @prev_va: Prealloced VA mapping object (init in callback). + * Used when a mapping or unmapping operation overlaps an existing + * mapping and splits away the beginning into a new mapping. + */ + struct pvr_vm_gpuva *prev_va; + + /** + * @next_va: Prealloced VA mapping object (init in callback). + * Used when a mapping or unmapping operation overlaps an existing + * mapping and splits away the end into a new mapping. + */ + struct pvr_vm_gpuva *next_va; + + /** + * @returned_gpuvas: When unlinking, this list holds now unused + * &struct pvr_vm_gpuva objects. These must be cleaned up when this + * object is cleaned up. + */ + struct list_head returned_gpuvas; +}; + +static void +pvr_vm_gpuva_link(struct pvr_vm_gpuva *va) +{ + struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(va->base.gem.obj); + + mutex_lock(&pvr_gem->gpuva_lock); + drm_gpuva_link(&va->base); + mutex_unlock(&pvr_gem->gpuva_lock); +} + +static void +pvr_vm_gpuva_unlink(struct pvr_vm_gpuva_op_ctx *op, struct pvr_vm_gpuva *va) +{ + struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(va->base.gem.obj); + + mutex_lock(&pvr_gem->gpuva_lock); + drm_gpuva_unlink(&va->base); + mutex_unlock(&pvr_gem->gpuva_lock); + + list_move_tail(&va->node, &op->returned_gpuvas); +} + +/** + * pvr_vm_gpuva_map() - Insert a mapping into a memory context. + * @op: gpuva op containing the remap details. + * @op_ctx: Operation context. + * + * Context: Called by drm_gpuvm_sm_map following a successful mapping while + * @op_ctx.vm_ctx mutex is held. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_mmu_map(). + */ +static int +pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx) +{ + struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj); + struct pvr_vm_gpuva_op_ctx *ctx = op_ctx; + int err; + + if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK) + return -EINVAL; + + err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags, + op->map.va.addr); + if (err) + return err; + + drm_gpuva_map(&ctx->vm_ctx->gpuvm_mgr, &ctx->new_va->base, &op->map); + pvr_vm_gpuva_link(ctx->new_va); + ctx->new_va = NULL; + + /* + * Setting the pvr_obj pointer to NULL in the pvr_vm_bind_op means that + * the reference won't be released when it is cleaned up. + */ + ctx->pvr_obj = NULL; + + return 0; +} + +/** + * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context. + * @op: gpuva op containing the unmap details. + * @op_ctx: Operation context. + * + * Context: Called by drm_gpuvm_sm_unmap following a successful unmapping while + * @op_ctx.vm_ctx mutex is held. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_mmu_unmap(). + */ +static int +pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx) +{ + struct pvr_vm_gpuva_op_ctx *ctx = op_ctx; + + int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr, + op->unmap.va->va.range); + + if (err) + return err; + + drm_gpuva_unmap(&op->unmap); + pvr_vm_gpuva_unlink(ctx, to_pvr_vm_gpuva(op->unmap.va)); + + return 0; +} + +/** + * pvr_vm_gpuva_remap() - Remap a mapping within a memory context. + * @op: gpuva op containing the remap details. + * @op_ctx: Operation context. + * + * Context: Called by either drm_gpuvm_sm_map or drm_gpuvm_sm_unmap when a + * mapping or unmapping operation causes a region to be split. The + * @op_ctx.vm_ctx mutex is held. + * + * Return: + * * 0 on success, or + * * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap(). + */ +static int +pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx) +{ + struct pvr_vm_gpuva_op_ctx *ctx = op_ctx; + u64 va_start = 0, va_range = 0; + int err; + + drm_gpuva_op_remap_get_unmap_range(&op->remap, &va_start, &va_range); + err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start, va_range); + if (err) + return err; + + /* No actual remap required: the page table tree depth is fixed to 3, + * and we use 4k page table entries only for now. + */ + drm_gpuva_remap(&ctx->prev_va->base, &ctx->next_va->base, &op->remap); + + if (op->remap.prev) { + pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->base.gem.obj)); + pvr_vm_gpuva_link(ctx->prev_va); + ctx->prev_va = NULL; + } + + if (op->remap.next) { + pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->base.gem.obj)); + pvr_vm_gpuva_link(ctx->next_va); + ctx->next_va = NULL; + } + + pvr_vm_gpuva_unlink(ctx, to_pvr_vm_gpuva(op->remap.unmap->va)); + + return 0; +} + +/* + * Public API + * + * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h". + */ + +/** + * pvr_device_addr_is_valid() - Tests whether a device-virtual address + * is valid. + * @device_addr: Virtual device address to test. + * + * Return: + * * %true if @device_addr is within the valid range for a device page + * table and is aligned to the device page size, or + * * %false otherwise. + */ +bool +pvr_device_addr_is_valid(u64 device_addr) +{ + return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 && + (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0; +} + +/** + * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual + * address and associated size are both valid. + * @device_addr: Virtual device address to test. + * @size: Size of the range based at @device_addr to test. + * + * Calling pvr_device_addr_is_valid() twice (once on @size, and again on + * @device_addr + @size) to verify a device-virtual address range initially + * seems intuitive, but it produces a false-negative when the address range + * is right at the end of device-virtual address space. + * + * This function catches that corner case, as well as checking that + * @size is non-zero. + * + * Return: + * * %true if @device_addr is device page aligned; @size is device page + * aligned; the range specified by @device_addr and @size is within the + * bounds of the device-virtual address space, and @size is non-zero, or + * * %false otherwise. + */ +bool +pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size) +{ + return pvr_device_addr_is_valid(device_addr) && + size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 && + (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE); +} + +static const struct drm_gpuvm_ops pvr_vm_gpuva_ops = { + .sm_step_map = pvr_vm_gpuva_map, + .sm_step_remap = pvr_vm_gpuva_remap, + .sm_step_unmap = pvr_vm_gpuva_unmap, +}; + +/** + * pvr_vm_create_context() - Create a new VM context. + * @pvr_dev: Target PowerVR device. + * @is_userspace_context: %true if this context is for userspace. This will + * create a firmware memory context for the VM context + * and disable warnings when tearing down mappings. + * + * Return: + * * A handle to the newly-minted VM context on success, + * * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is + * missing or has an unsupported value, + * * -%ENOMEM if allocation of the structure behind the opaque handle fails, + * or + * * Any error encountered while setting up internal structures. + */ +struct pvr_vm_context * +pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + + struct pvr_vm_context *vm_ctx; + u16 device_addr_bits; + + int err; + + err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits, + &device_addr_bits); + if (err) { + drm_err(drm_dev, + "Failed to get device virtual address space bits\n"); + return ERR_PTR(err); + } + + if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) { + drm_err(drm_dev, + "Device has unsupported virtual address space size\n"); + return ERR_PTR(-EINVAL); + } + + vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL); + if (!vm_ctx) + return ERR_PTR(-ENOMEM); + + vm_ctx->pvr_dev = pvr_dev; + kref_init(&vm_ctx->ref_count); + mutex_init(&vm_ctx->lock); + + drm_gpuvm_init(&vm_ctx->gpuvm_mgr, + is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM", + 0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops); + + vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev); + err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx); + if (err) { + vm_ctx->mmu_ctx = NULL; + goto err_put_ctx; + } + + if (is_userspace_context) { + /* TODO: Create FW mem context */ + err = -ENODEV; + goto err_put_ctx; + } + + return vm_ctx; + +err_put_ctx: + pvr_vm_context_put(vm_ctx); + + return ERR_PTR(err); +} + +/** + * pvr_vm_context_release() - Teardown a VM context. + * @ref_count: Pointer to reference counter of the VM context. + * + * This function ensures that no mappings are left dangling by unmapping them + * all in order of ascending device-virtual address. + */ +static void +pvr_vm_context_release(struct kref *ref_count) +{ + struct pvr_vm_context *vm_ctx = + container_of(ref_count, struct pvr_vm_context, ref_count); + + /* TODO: Destroy FW mem context */ + WARN_ON(vm_ctx->fw_mem_ctx_obj); + + WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start, + vm_ctx->gpuvm_mgr.mm_range)); + + drm_gpuvm_destroy(&vm_ctx->gpuvm_mgr); + pvr_mmu_context_destroy(vm_ctx->mmu_ctx); + mutex_destroy(&vm_ctx->lock); + + kfree(vm_ctx); +} + +/** + * pvr_vm_context_lookup() - Look up VM context from handle + * @pvr_file: Pointer to pvr_file structure. + * @handle: Object handle. + * + * Takes reference on VM context object. Call pvr_vm_context_put() to release. + * + * Returns: + * * The requested object on success, or + * * %NULL on failure (object does not exist in list, or is not a VM context) + */ +struct pvr_vm_context * +pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle) +{ + struct pvr_vm_context *vm_ctx; + + xa_lock(&pvr_file->vm_ctx_handles); + vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle); + if (vm_ctx) + kref_get(&vm_ctx->ref_count); + + xa_unlock(&pvr_file->vm_ctx_handles); + + return vm_ctx; +} + +/** + * pvr_vm_context_put() - Release a reference on a VM context + * @vm_ctx: Target VM context. + * + * Returns: + * * %true if the VM context was destroyed, or + * * %false if there are any references still remaining. + */ +bool +pvr_vm_context_put(struct pvr_vm_context *vm_ctx) +{ + WARN_ON(!vm_ctx); + + if (vm_ctx) + return kref_put(&vm_ctx->ref_count, pvr_vm_context_release); + + return true; +} + +/** + * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the + * given file. + * @pvr_file: Pointer to pvr_file structure. + * + * Removes all vm_contexts associated with @pvr_file from the device VM context + * list and drops initial references. vm_contexts will then be destroyed once + * all outstanding references are dropped. + */ +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file) +{ + struct pvr_vm_context *vm_ctx; + unsigned long handle; + + xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) { + /* vm_ctx is not used here because that would create a race with xa_erase */ + pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle)); + } +} + +/** + * pvr_vm_map() - Map a section of physical memory into a section of device-virtual memory. + * @vm_ctx: Target VM context. + * @pvr_obj: Target PowerVR memory object. + * @pvr_obj_offset: Offset into @pvr_obj to map from. + * @device_addr: Virtual device address at the start of the requested mapping. + * @size: Size of the requested mapping. + * + * No handle is returned to represent the mapping. Instead, callers should + * remember @device_addr and use that as a handle. + * + * Return: + * * 0 on success, + * * -%EINVAL if @device_addr is not a valid page-aligned device-virtual + * address; the region specified by @pvr_obj_offset and @size does not fall + * entirely within @pvr_obj, or any part of the specified region of @pvr_obj + * is not device-virtual page-aligned, + * * Any error encountered while performing internal operations required to + * destroy the mapping (returned from pvr_vm_gpuva_map or + * pvr_vm_gpuva_remap). + */ +int +pvr_vm_map(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, + u64 device_addr, u64 size) +{ + const size_t pvr_obj_size = pvr_gem_object_size(pvr_obj); + struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx }; + struct pvr_vm_gpuva *gpuva, *tmp_gpuva; + struct sg_table *sgt; + int err; + + if (!pvr_device_addr_and_size_are_valid(device_addr, size) || + pvr_obj_offset & ~PAGE_MASK || size & ~PAGE_MASK || + pvr_obj_offset + size > pvr_obj_size || + pvr_obj_offset > pvr_obj_size) { + return -EINVAL; + } + + INIT_LIST_HEAD(&op_ctx.returned_gpuvas); + + op_ctx.new_va = kzalloc(sizeof(*op_ctx.new_va), GFP_KERNEL); + op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL); + op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL); + if (!op_ctx.new_va || !op_ctx.prev_va || !op_ctx.next_va) { + err = -ENOMEM; + goto out_free; + } + + pvr_vm_gpuva_mapping_init(op_ctx.new_va); + pvr_vm_gpuva_mapping_init(op_ctx.prev_va); + pvr_vm_gpuva_mapping_init(op_ctx.next_va); + + sgt = pvr_gem_object_get_pages_sgt(pvr_obj); + err = PTR_ERR_OR_ZERO(sgt); + if (err) + goto out_free; + + op_ctx.mmu_op_ctx = pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt, + pvr_obj_offset, size); + err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx); + if (err) { + op_ctx.mmu_op_ctx = NULL; + goto out_mmu_op_ctx_destroy; + } + + pvr_gem_object_get(pvr_obj); + op_ctx.pvr_obj = pvr_obj; + + mutex_lock(&vm_ctx->lock); + err = drm_gpuvm_sm_map(&vm_ctx->gpuvm_mgr, &op_ctx, device_addr, size, + gem_from_pvr_gem(pvr_obj), pvr_obj_offset); + mutex_unlock(&vm_ctx->lock); + + pvr_gem_object_put(op_ctx.pvr_obj); + +out_mmu_op_ctx_destroy: + pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx); + +out_free: + kfree(op_ctx.next_va); + kfree(op_ctx.prev_va); + kfree(op_ctx.new_va); + + list_for_each_entry_safe(gpuva, tmp_gpuva, &op_ctx.returned_gpuvas, + node) { + drm_gem_object_put(gpuva->base.gem.obj); + list_del(&gpuva->node); + kfree(gpuva); + } + + return err; +} + +/** + * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory. + * @vm_ctx: Target VM context. + * @device_addr: Virtual device address at the start of the target mapping. + * @size: Size of the target mapping. + * + * Return: + * * 0 on success, + * * -%EINVAL if @device_addr is not a valid page-aligned device-virtual + * address, + * * Any error encountered while performing internal operations required to + * destroy the mapping (returned from pvr_vm_gpuva_unmap or + * pvr_vm_gpuva_remap). + */ +int +pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size) +{ + struct pvr_vm_gpuva_op_ctx op_ctx = { .vm_ctx = vm_ctx }; + struct pvr_vm_gpuva *gpuva, *tmp_gpuva; + int err; + + if (!pvr_device_addr_and_size_are_valid(device_addr, size)) + return -EINVAL; + + INIT_LIST_HEAD(&op_ctx.returned_gpuvas); + + op_ctx.prev_va = kzalloc(sizeof(*op_ctx.prev_va), GFP_KERNEL); + op_ctx.next_va = kzalloc(sizeof(*op_ctx.next_va), GFP_KERNEL); + if (!op_ctx.prev_va || !op_ctx.next_va) { + err = -ENOMEM; + goto out; + } + + pvr_vm_gpuva_mapping_init(op_ctx.prev_va); + pvr_vm_gpuva_mapping_init(op_ctx.next_va); + + op_ctx.mmu_op_ctx = + pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0); + err = PTR_ERR_OR_ZERO(op_ctx.mmu_op_ctx); + if (err) { + op_ctx.mmu_op_ctx = NULL; + goto out; + } + + mutex_lock(&vm_ctx->lock); + err = drm_gpuvm_sm_unmap(&vm_ctx->gpuvm_mgr, &op_ctx, device_addr, size); + mutex_unlock(&vm_ctx->lock); + +out: + pvr_mmu_op_context_destroy(op_ctx.mmu_op_ctx); + kfree(op_ctx.next_va); + kfree(op_ctx.prev_va); + + list_for_each_entry_safe(gpuva, tmp_gpuva, &op_ctx.returned_gpuvas, + node) { + drm_gem_object_put(gpuva->base.gem.obj); + list_del(&gpuva->node); + kfree(gpuva); + } + + return err; +} + +/* + * Static data areas are determined by firmware. + * + * When adding a new static data area you will also need to update the reserved_size field for the + * heap in pvr_heaps[]. + */ +static const struct drm_pvr_static_data_area static_data_areas[] = { + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE, + .location_heap_id = DRM_PVR_HEAP_GENERAL, + .offset = 0, + .size = 128, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC, + .location_heap_id = DRM_PVR_HEAP_GENERAL, + .offset = 128, + .size = 1024, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC, + .location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA, + .offset = 0, + .size = 128, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_EOT, + .location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA, + .offset = 128, + .size = 128, + }, + { + .area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC, + .location_heap_id = DRM_PVR_HEAP_USC_CODE, + .offset = 0, + .size = 128, + }, +}; + +#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE) + +/* + * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding + * static data area for each heap. + */ +static const struct drm_pvr_heap pvr_heaps[] = { + [DRM_PVR_HEAP_GENERAL] = { + .base = ROGUE_GENERAL_HEAP_BASE, + .size = ROGUE_GENERAL_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_PDS_CODE_DATA] = { + .base = ROGUE_PDSCODEDATA_HEAP_BASE, + .size = ROGUE_PDSCODEDATA_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_USC_CODE] = { + .base = ROGUE_USCCODE_HEAP_BASE, + .size = ROGUE_USCCODE_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_RGNHDR] = { + .base = ROGUE_RGNHDR_HEAP_BASE, + .size = ROGUE_RGNHDR_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_VIS_TEST] = { + .base = ROGUE_VISTEST_HEAP_BASE, + .size = ROGUE_VISTEST_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, + [DRM_PVR_HEAP_TRANSFER_FRAG] = { + .base = ROGUE_TRANSFER_FRAG_HEAP_BASE, + .size = ROGUE_TRANSFER_FRAG_HEAP_SIZE, + .flags = 0, + .page_size_log2 = PVR_DEVICE_PAGE_SHIFT, + }, +}; + +int +pvr_static_data_areas_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + struct drm_pvr_dev_query_static_data_areas query = {0}; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_static_data_areas); + return 0; + } + + err = PVR_UOBJ_GET(query, args->size, args->pointer); + if (err < 0) + return err; + + if (!query.static_data_areas.array) { + query.static_data_areas.count = ARRAY_SIZE(static_data_areas); + query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area); + goto copy_out; + } + + if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas)) + query.static_data_areas.count = ARRAY_SIZE(static_data_areas); + + err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas); + if (err < 0) + return err; + +copy_out: + err = PVR_UOBJ_SET(args->pointer, args->size, query); + if (err < 0) + return err; + + args->size = sizeof(query); + return 0; +} + +int +pvr_heap_info_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args) +{ + struct drm_pvr_dev_query_heap_info query = {0}; + u64 dest; + int err; + + if (!args->pointer) { + args->size = sizeof(struct drm_pvr_dev_query_heap_info); + return 0; + } + + err = PVR_UOBJ_GET(query, args->size, args->pointer); + if (err < 0) + return err; + + if (!query.heaps.array) { + query.heaps.count = ARRAY_SIZE(pvr_heaps); + query.heaps.stride = sizeof(struct drm_pvr_heap); + goto copy_out; + } + + if (query.heaps.count > ARRAY_SIZE(pvr_heaps)) + query.heaps.count = ARRAY_SIZE(pvr_heaps); + + /* Region header heap is only present if BRN63142 is present. */ + dest = query.heaps.array; + for (size_t i = 0; i < query.heaps.count; i++) { + struct drm_pvr_heap heap = pvr_heaps[i]; + + if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142)) + heap.size = 0; + + err = PVR_UOBJ_SET(dest, query.heaps.stride, heap); + if (err < 0) + return err; + + dest += query.heaps.stride; + } + +copy_out: + err = PVR_UOBJ_SET(args->pointer, args->size, query); + if (err < 0) + return err; + + args->size = sizeof(query); + return 0; +} + +/** + * pvr_heap_contains_range() - Determine if a given heap contains the specified + * device-virtual address range. + * @pvr_heap: Target heap. + * @start: Inclusive start of the target range. + * @end: Inclusive end of the target range. + * + * It is an error to call this function with values of @start and @end that do + * not satisfy the condition @start <= @end. + */ +static __always_inline bool +pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end) +{ + return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size; +} + +/** + * pvr_find_heap_containing() - Find a heap which contains the specified + * device-virtual address range. + * @pvr_dev: Target PowerVR device. + * @start: Start of the target range. + * @size: Size of the target range. + * + * Return: + * * A pointer to a constant instance of struct drm_pvr_heap representing the + * heap containing the entire range specified by @start and @size on + * success, or + * * %NULL if no such heap exists. + */ +const struct drm_pvr_heap * +pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size) +{ + u64 end; + + if (check_add_overflow(start, size - 1, &end)) + return NULL; + + /* + * There are no guarantees about the order of address ranges in + * &pvr_heaps, so iterate over the entire array for a heap whose + * range completely encompasses the given range. + */ + for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) { + /* Filter heaps that present only with an associated quirk */ + if (heap_id == DRM_PVR_HEAP_RGNHDR && + !PVR_HAS_QUIRK(pvr_dev, 63142)) { + continue; + } + + if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end)) + return &pvr_heaps[heap_id]; + } + + return NULL; +} + +/** + * pvr_vm_find_gem_object() - Look up a buffer object from a given + * device-virtual address. + * @vm_ctx: [IN] Target VM context. + * @device_addr: [IN] Virtual device address at the start of the required + * object. + * @mapped_offset_out: [OUT] Pointer to location to write offset of the start + * of the mapped region within the buffer object. May be + * %NULL if this information is not required. + * @mapped_size_out: [OUT] Pointer to location to write size of the mapped + * region. May be %NULL if this information is not required. + * + * If successful, a reference will be taken on the buffer object. The caller + * must drop the reference with pvr_gem_object_put(). + * + * Return: + * * The PowerVR buffer object mapped at @device_addr if one exists, or + * * %NULL otherwise. + */ +struct pvr_gem_object * +pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr, + u64 *mapped_offset_out, u64 *mapped_size_out) +{ + struct pvr_gem_object *pvr_obj; + struct drm_gpuva *va; + + mutex_lock(&vm_ctx->lock); + + va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, device_addr, 1); + if (!va) + goto err_unlock; + + pvr_obj = gem_to_pvr_gem(va->gem.obj); + pvr_gem_object_get(pvr_obj); + + if (mapped_offset_out) + *mapped_offset_out = va->gem.offset; + if (mapped_size_out) + *mapped_size_out = va->va.range; + + mutex_unlock(&vm_ctx->lock); + + return pvr_obj; + +err_unlock: + mutex_unlock(&vm_ctx->lock); + + return NULL; +} + +/** + * pvr_vm_get_fw_mem_context: Get object representing firmware memory context + * @vm_ctx: Target VM context. + * + * Returns: + * * FW object representing firmware memory context, or + * * %NULL if this VM context does not have a firmware memory context. + */ +struct pvr_fw_object * +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx) +{ + return vm_ctx->fw_mem_ctx_obj; +} diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h new file mode 100644 index 000000000000..b98bc3981807 --- /dev/null +++ b/drivers/gpu/drm/imagination/pvr_vm.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_VM_H +#define PVR_VM_H + +#include "pvr_rogue_mmu_defs.h" + +#include <uapi/drm/pvr_drm.h> + +#include <linux/types.h> + +/* Forward declaration from "pvr_device.h" */ +struct pvr_device; +struct pvr_file; + +/* Forward declaration from "pvr_gem.h" */ +struct pvr_gem_object; + +/* Forward declaration from "pvr_vm.c" */ +struct pvr_vm_context; + +/* Forward declaration from <uapi/drm/pvr_drm.h> */ +struct drm_pvr_ioctl_get_heap_info_args; + +/* Functions defined in pvr_vm.c */ + +bool pvr_device_addr_is_valid(u64 device_addr); +bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size); + +struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev, + bool is_userspace_context); + +int pvr_vm_map(struct pvr_vm_context *vm_ctx, + struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset, + u64 device_addr, u64 size); +int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size); + +dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx); + +int pvr_static_data_areas_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args); +int pvr_heap_info_get(const struct pvr_device *pvr_dev, + struct drm_pvr_ioctl_dev_query_args *args); +const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev, + u64 addr, u64 size); + +struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, + u64 device_addr, + u64 *mapped_offset_out, + u64 *mapped_size_out); + +struct pvr_fw_object * +pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx); + +struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle); +bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx); +void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file); + +#endif /* PVR_VM_H */ -- 2.42.0