A maple tree is used to maintain a map from guest address ranges to a guestmemfd that provides the memory for that range of memory for the guest. The mapping of guest address range to guestmemfd is called a binding. Implement an ioctl to add/remove bindings to the virtual machine. The binding determines whether the memory is shared (host retains access) or lent (host loses access). Signed-off-by: Elliot Berman <quic_eberman@xxxxxxxxxxx> --- drivers/virt/gunyah/guest_memfd.c | 277 ++++++++++++++++++++++++++++++++++++++ drivers/virt/gunyah/vm_mgr.c | 15 +++ drivers/virt/gunyah/vm_mgr.h | 6 + include/uapi/linux/gunyah.h | 41 ++++++ 4 files changed, 339 insertions(+) diff --git a/drivers/virt/gunyah/guest_memfd.c b/drivers/virt/gunyah/guest_memfd.c index 709aae9a1f44..c38380c4dc50 100644 --- a/drivers/virt/gunyah/guest_memfd.c +++ b/drivers/virt/gunyah/guest_memfd.c @@ -9,11 +9,61 @@ #include <linux/types.h> #include <linux/falloc.h> #include <linux/file.h> +#include <linux/maple_tree.h> #include <linux/migrate.h> #include <linux/pagemap.h> #include <uapi/linux/gunyah.h> +#include "vm_mgr.h" + +/** + * struct gunyah_gmem_binding - Represents a binding of guestmem to a Gunyah VM + * @gfn: Guest address to place acquired folios + * @ghvm: Pointer to Gunyah VM in this binding + * @mt: Maple tree to track folios which have been provided to the VM + * @i_off: offset into the guestmem to grab folios from + * @inode: Pointer to guest mem inode + * @i_entry: list entry for inode->i_private_list + * @flags: Access flags for the binding + * @nr: Number of pages covered by this binding + */ +struct gunyah_gmem_binding { + u64 gfn; + struct gunyah_vm *ghvm; + struct maple_tree mt; + + pgoff_t i_off; + struct inode *inode; + struct list_head i_entry; + + u32 flags; + unsigned long nr; +}; + +static inline pgoff_t gunyah_gfn_to_off(struct gunyah_gmem_binding *b, u64 gfn) +{ + return gfn - b->gfn + b->i_off; +} + +static inline u64 gunyah_off_to_gfn(struct gunyah_gmem_binding *b, pgoff_t off) +{ + return off - b->i_off + b->gfn; +} + +static inline bool gunyah_guest_mem_is_lend(struct gunyah_vm *ghvm, u32 flags) +{ + u8 access = flags & GUNYAH_MEM_ACCESS_MASK; + + if (access == GUNYAH_MEM_FORCE_LEND) + return true; + else if (access == GUNYAH_MEM_FORCE_SHARE) + return false; + + /* RM requires all VMs to be protected (isolated) */ + return true; +} + static struct folio *gunyah_gmem_get_huge_folio(struct inode *inode, pgoff_t index) { @@ -191,8 +241,15 @@ static long gunyah_gmem_fallocate(struct file *file, int mode, loff_t offset, static int gunyah_gmem_release(struct inode *inode, struct file *file) { + struct gunyah_gmem_binding *b, *n; + gunyah_gmem_punch_hole(inode, 0, U64_MAX); + list_for_each_entry_safe(b, n, &inode->i_mapping->i_private_list, + i_entry) { + gunyah_gmem_remove_binding(b); + } + return 0; } @@ -267,3 +324,223 @@ int gunyah_guest_mem_create(struct gunyah_create_mem_args *args) put_unused_fd(fd); return err; } + +void gunyah_gmem_remove_binding(struct gunyah_gmem_binding *b) +{ + mtree_erase(&b->ghvm->mem_layout, b->gfn); + list_del(&b->i_entry); + kfree(b); +} + +static inline unsigned long gunyah_gmem_page_mask(struct inode *inode) +{ + unsigned long gmem_flags = (unsigned long)inode->i_private; + + if (gmem_flags & GHMF_ALLOW_HUGEPAGE) { +#if IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) + return HPAGE_PMD_MASK; +#else + return ULONG_MAX; +#endif + } + + return PAGE_MASK; +} + +static int gunyah_gmem_init_binding(struct gunyah_vm *ghvm, struct inode *inode, + struct gunyah_map_mem_args *args, + struct gunyah_gmem_binding *binding) +{ + const unsigned long page_mask = ~gunyah_gmem_page_mask(inode); + + if (args->flags & ~(GUNYAH_MEM_ALLOW_RWX | GUNYAH_MEM_ACCESS_MASK)) + return -EINVAL; + + if (args->guest_addr & page_mask) + return -EINVAL; + + if (args->offset & page_mask) + return -EINVAL; + + if (args->size & page_mask) + return -EINVAL; + + binding->gfn = gunyah_gpa_to_gfn(args->guest_addr); + binding->ghvm = ghvm; + binding->i_off = args->offset >> PAGE_SHIFT; + binding->inode = inode; + binding->flags = args->flags; + binding->nr = args->size >> PAGE_SHIFT; + + return 0; +} + +static int gunyah_gmem_remove_mapping(struct gunyah_vm *ghvm, + struct inode *inode, + struct gunyah_map_mem_args *args) +{ + struct gunyah_gmem_binding argb; + struct gunyah_gmem_binding *b = NULL; + unsigned long start_delta, end_delta; + int ret; + + ret = gunyah_gmem_init_binding(ghvm, inode, args, &argb); + if (ret) + return ret; + + filemap_invalidate_lock(inode->i_mapping); + list_for_each_entry(b, &inode->i_mapping->i_private_list, i_entry) { + if (b->ghvm != argb.ghvm || b->flags != argb.flags || + WARN_ON(b->inode != argb.inode)) + continue; + /* Check if argb guest addresses is within b */ + if (b->gfn > argb.gfn) + continue; + if (b->gfn + b->nr < argb.gfn + argb.nr) + continue; + start_delta = argb.gfn - b->gfn; + if (argb.i_off - b->i_off != start_delta) + continue; + end_delta = argb.gfn + argb.nr - b->gfn - b->nr; + if (!start_delta && !end_delta) { + /* wipe the mapping entirely */ + gunyah_gmem_remove_binding(b); + goto out; + } else if (start_delta && !end_delta) { + /* shrink the end */ + down_write(&ghvm->mem_lock); + mtree_erase(&b->ghvm->mem_layout, b->gfn); + b->nr = start_delta; + ret = mtree_insert_range(&ghvm->mem_layout, b->gfn, + b->gfn + b->nr - 1, b, + GFP_KERNEL); + up_write(&ghvm->mem_lock); + goto out; + } else if (!start_delta && end_delta) { + /* Shrink the beginning */ + down_write(&ghvm->mem_lock); + mtree_erase(&b->ghvm->mem_layout, b->gfn); + b->gfn += argb.nr; + b->i_off += argb.nr; + b->nr -= argb.nr; + ret = mtree_insert_range(&ghvm->mem_layout, b->gfn, + b->gfn + b->nr - 1, b, + GFP_KERNEL); + up_write(&ghvm->mem_lock); + goto out; + } else { + /* TODO: split the mapping into 2 */ + ret = -EINVAL; + goto out; + } + } + ret = -ENOENT; +out: + filemap_invalidate_unlock(inode->i_mapping); + return ret; +} + +static bool gunyah_gmem_binding_allowed_overlap(struct gunyah_gmem_binding *a, + struct gunyah_gmem_binding *b) +{ + /* Bindings can't overlap within a VM. Only one guest mem can + * provide for a given guest address + */ + if (a->ghvm == b->ghvm && a->gfn + a->nr <= b->gfn && + a->gfn >= b->gfn + b->nr) + return false; + + /* Gunyah only guarantees we can share a page with one VM and + * doesn't (currently) allow us to share same page with multiple VMs, + * regardless whether host can also access. + */ + if (a->inode == b->inode) { + if (a->ghvm == b->ghvm) { + if (gunyah_guest_mem_is_lend(a->ghvm, a->flags) || + gunyah_guest_mem_is_lend(b->ghvm, b->flags)) + return false; + } else { + if (a->i_off + a->nr < b->i_off) + return false; + if (a->i_off > b->i_off + b->nr) + return false; + } + } + + return true; +} + +static int gunyah_gmem_add_mapping(struct gunyah_vm *ghvm, struct inode *inode, + struct gunyah_map_mem_args *args) +{ + struct gunyah_gmem_binding *b, *tmp = NULL; + int ret; + + b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return -ENOMEM; + + ret = gunyah_gmem_init_binding(ghvm, inode, args, b); + if (ret) + return ret; + + filemap_invalidate_lock(inode->i_mapping); + list_for_each_entry(tmp, &inode->i_mapping->i_private_list, i_entry) { + if (!gunyah_gmem_binding_allowed_overlap(b, tmp)) { + ret = -EEXIST; + goto unlock; + } + } + + ret = mtree_insert_range(&ghvm->mem_layout, b->gfn, b->gfn + b->nr - 1, + b, GFP_KERNEL); + if (ret) + goto unlock; + + list_add(&b->i_entry, &inode->i_mapping->i_private_list); + +unlock: + filemap_invalidate_unlock(inode->i_mapping); + return ret; +} + +int gunyah_gmem_modify_binding(struct gunyah_vm *ghvm, + struct gunyah_map_mem_args *args) +{ + u8 access = args->flags & GUNYAH_MEM_ACCESS_MASK; + struct file *file; + int ret = -EINVAL; + + file = fget(args->guest_mem_fd); + if (!file) + return -EINVAL; + + if (file->f_op != &gunyah_gmem_fops) + goto err_file; + + if (args->flags & ~(GUNYAH_MEM_ALLOW_RWX | GUNYAH_MEM_UNMAP | GUNYAH_MEM_ACCESS_MASK)) + goto err_file; + + /* VM needs to have some permissions to the memory */ + if (!(args->flags & GUNYAH_MEM_ALLOW_RWX)) + goto err_file; + + if (access != GUNYAH_MEM_DEFAULT_ACCESS && + access != GUNYAH_MEM_FORCE_LEND && access != GUNYAH_MEM_FORCE_SHARE) + goto err_file; + + if (!PAGE_ALIGNED(args->guest_addr) || !PAGE_ALIGNED(args->offset) || + !PAGE_ALIGNED(args->size)) + goto err_file; + + if (args->flags & GUNYAH_MEM_UNMAP) { + args->flags &= ~GUNYAH_MEM_UNMAP; + ret = gunyah_gmem_remove_mapping(ghvm, file_inode(file), args); + } else { + ret = gunyah_gmem_add_mapping(ghvm, file_inode(file), args); + } + +err_file: + fput(file); + return ret; +} diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c index cd978d1ce93f..5666070453aa 100644 --- a/drivers/virt/gunyah/vm_mgr.c +++ b/drivers/virt/gunyah/vm_mgr.c @@ -519,6 +519,8 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gunyah_rm *rm) mutex_init(&ghvm->fn_lock); mt_init(&ghvm->gm); + mt_init(&ghvm->mem_layout); + init_rwsem(&ghvm->mem_lock); ghvm->addrspace_ticket.resource_type = GUNYAH_RESOURCE_TYPE_ADDR_SPACE; ghvm->addrspace_ticket.label = GUNYAH_VM_ADDRSPACE_LABEL; @@ -673,6 +675,14 @@ static long gunyah_vm_ioctl(struct file *filp, unsigned int cmd, r = gunyah_vm_rm_function_instance(ghvm, &f); break; } + case GUNYAH_VM_MAP_MEM: { + struct gunyah_map_mem_args args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + return gunyah_gmem_modify_binding(ghvm, &args); + } default: r = -ENOTTY; break; @@ -690,6 +700,8 @@ EXPORT_SYMBOL_GPL(gunyah_vm_get); static void _gunyah_vm_put(struct kref *kref) { struct gunyah_vm *ghvm = container_of(kref, struct gunyah_vm, kref); + struct gunyah_gmem_binding *b; + unsigned long idx = 0; int ret; if (ghvm->vm_status == GUNYAH_RM_VM_STATUS_RUNNING) @@ -697,6 +709,9 @@ static void _gunyah_vm_put(struct kref *kref) gunyah_vm_remove_functions(ghvm); + mt_for_each(&ghvm->mem_layout, b, idx, ULONG_MAX) + gunyah_gmem_remove_binding(b); + mtree_destroy(&ghvm->mem_layout); gunyah_vm_reclaim_memory(ghvm); gunyah_vm_remove_resource_ticket(ghvm, &ghvm->addrspace_ticket); diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h index d26693d10d22..8f1c3ade08dd 100644 --- a/drivers/virt/gunyah/vm_mgr.h +++ b/drivers/virt/gunyah/vm_mgr.h @@ -36,6 +36,8 @@ long gunyah_dev_vm_mgr_ioctl(struct gunyah_rm *rm, unsigned int cmd, struct gunyah_vm { u16 vmid; struct maple_tree gm; + struct maple_tree mem_layout; + struct rw_semaphore mem_lock; struct gunyah_vm_resource_ticket addrspace_ticket, host_private_extent_ticket, host_shared_extent_ticket, guest_private_extent_ticket, guest_shared_extent_ticket; @@ -78,5 +80,9 @@ void gunyah_vm_reclaim_memory(struct gunyah_vm *ghvm); int gunyah_vm_mmio_write(struct gunyah_vm *ghvm, u64 addr, u32 len, u64 data); int gunyah_guest_mem_create(struct gunyah_create_mem_args *args); +int gunyah_gmem_modify_binding(struct gunyah_vm *ghvm, + struct gunyah_map_mem_args *args); +struct gunyah_gmem_binding; +void gunyah_gmem_remove_binding(struct gunyah_gmem_binding *binding); #endif diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h index c5f506350364..1af4c5ae6bc3 100644 --- a/include/uapi/linux/gunyah.h +++ b/include/uapi/linux/gunyah.h @@ -87,6 +87,47 @@ struct gunyah_fn_desc { #define GUNYAH_VM_ADD_FUNCTION _IOW(GUNYAH_IOCTL_TYPE, 0x4, struct gunyah_fn_desc) #define GUNYAH_VM_REMOVE_FUNCTION _IOW(GUNYAH_IOCTL_TYPE, 0x7, struct gunyah_fn_desc) +/** + * enum gunyah_map_flags- Possible flags on &struct gunyah_map_mem_args + * @GUNYAH_MEM_DEFAULT_SHARE: Use default host access for the VM type + * @GUNYAH_MEM_FORCE_LEND: Force unmapping the memory once the guest starts to use + * @GUNYAH_MEM_FORCE_SHARE: Allow host to continue accessing memory when guest starts to use + * @GUNYAH_MEM_ALLOW_READ: Allow guest to read memory + * @GUNYAH_MEM_ALLOW_WRITE: Allow guest to write to the memory + * @GUNYAH_MEM_ALLOW_EXEC: Allow guest to execute instructions in the memory + */ +enum gunyah_map_flags { + GUNYAH_MEM_DEFAULT_ACCESS = 0, + GUNYAH_MEM_FORCE_LEND = 1, + GUNYAH_MEM_FORCE_SHARE = 2, +#define GUNYAH_MEM_ACCESS_MASK 0x7 + + GUNYAH_MEM_ALLOW_READ = 1UL << 4, + GUNYAH_MEM_ALLOW_WRITE = 1UL << 5, + GUNYAH_MEM_ALLOW_EXEC = 1UL << 6, + GUNYAH_MEM_ALLOW_RWX = + (GUNYAH_MEM_ALLOW_READ | GUNYAH_MEM_ALLOW_WRITE | GUNYAH_MEM_ALLOW_EXEC), + + GUNYAH_MEM_UNMAP = 1UL << 8, +}; + +/** + * struct gunyah_map_mem_args - Description to provide guest memory into a VM + * @guest_addr: Location in guest address space to place the memory + * @flags: See &enum gunyah_map_flags. + * @guest_mem_fd: File descriptor created by GUNYAH_CREATE_GUEST_MEM + * @offset: Offset into the guest memory file + */ +struct gunyah_map_mem_args { + __u64 guest_addr; + __u32 flags; + __u32 guest_mem_fd; + __u64 offset; + __u64 size; +}; + +#define GUNYAH_VM_MAP_MEM _IOW(GUNYAH_IOCTL_TYPE, 0x9, struct gunyah_map_mem_args) + /* * ioctls for vCPU fds */ -- 2.43.0