From: "Yingshiuan Pan" <yingshiuan.pan@xxxxxxxxxxxx> Direct use of physical memory from VMs is forbidden and designed to be dictated to the privilege models managed by GenieZone hypervisor for security reason. With the help of gzvm-ko, the hypervisor would be able to manipulate memory as objects. And the memory management is highly integrated with ARM 2-stage translation tables to convert VA to IPA to PA under proper security measures required by protected VMs. Signed-off-by: Yingshiuan Pan <yingshiuan.pan@xxxxxxxxxxxx> Signed-off-by: Jerry Wang <ze-yu.wang@xxxxxxxxxxxx> Signed-off-by: Liju Chen <liju-clr.chen@xxxxxxxxxxxx> Signed-off-by: Yi-De Wu <yi-de.wu@xxxxxxxxxxxx> --- arch/arm64/geniezone/gzvm_arch_common.h | 2 + arch/arm64/geniezone/vm.c | 34 +++++ drivers/virt/geniezone/Makefile | 3 +- drivers/virt/geniezone/gzvm_mmu.c | 108 ++++++++++++++ drivers/virt/geniezone/gzvm_vm.c | 182 ++++++++++++++++++++++++ include/linux/gzvm_drv.h | 44 ++++++ include/uapi/linux/gzvm.h | 26 ++++ 7 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 drivers/virt/geniezone/gzvm_mmu.c diff --git a/arch/arm64/geniezone/gzvm_arch_common.h b/arch/arm64/geniezone/gzvm_arch_common.h index f4439e43b018..14d288299435 100644 --- a/arch/arm64/geniezone/gzvm_arch_common.h +++ b/arch/arm64/geniezone/gzvm_arch_common.h @@ -11,6 +11,7 @@ enum { GZVM_FUNC_CREATE_VM = 0, GZVM_FUNC_DESTROY_VM = 1, + GZVM_FUNC_SET_MEMREGION = 4, GZVM_FUNC_PROBE = 12, NR_GZVM_FUNC, }; @@ -23,6 +24,7 @@ enum { #define MT_HVC_GZVM_CREATE_VM GZVM_HCALL_ID(GZVM_FUNC_CREATE_VM) #define MT_HVC_GZVM_DESTROY_VM GZVM_HCALL_ID(GZVM_FUNC_DESTROY_VM) +#define MT_HVC_GZVM_SET_MEMREGION GZVM_HCALL_ID(GZVM_FUNC_SET_MEMREGION) #define MT_HVC_GZVM_PROBE GZVM_HCALL_ID(GZVM_FUNC_PROBE) /** diff --git a/arch/arm64/geniezone/vm.c b/arch/arm64/geniezone/vm.c index a17bc628a496..47b41ac47f7a 100644 --- a/arch/arm64/geniezone/vm.c +++ b/arch/arm64/geniezone/vm.c @@ -24,6 +24,15 @@ int gzvm_arch_probe(void) return 0; } +int gzvm_arch_set_memregion(u16 vm_id, size_t buf_size, + phys_addr_t region) +{ + struct arm_smccc_res res; + + return gzvm_hypcall_wrapper(MT_HVC_GZVM_SET_MEMREGION, vm_id, + buf_size, region, 0, 0, 0, 0, &res); +} + /** * gzvm_arch_create_vm() - create vm * @vm_type: VM type. Only supports Linux VM now. @@ -49,3 +58,28 @@ int gzvm_arch_destroy_vm(u16 vm_id) return gzvm_hypcall_wrapper(MT_HVC_GZVM_DESTROY_VM, vm_id, 0, 0, 0, 0, 0, 0, &res); } + +/** + * gzvm_hva_to_pa_arch() - converts hva to pa with arch-specific way + * @hva: Host virtual address. + * + * Return: GZVM_PA_ERR_BAD for translation error + */ +u64 gzvm_hva_to_pa_arch(u64 hva) +{ + unsigned long flags; + u64 par; + + local_irq_save(flags); + asm volatile("at s1e1r, %0" :: "r" (hva)); + isb(); + par = read_sysreg_par(); + local_irq_restore(flags); + + if (par & SYS_PAR_EL1_F) + return GZVM_PA_ERR_BAD; + par = par & PAR_PA47_MASK; + if (!par) + return GZVM_PA_ERR_BAD; + return par; +} diff --git a/drivers/virt/geniezone/Makefile b/drivers/virt/geniezone/Makefile index 066efddc0b9c..43eba5cbbf4e 100644 --- a/drivers/virt/geniezone/Makefile +++ b/drivers/virt/geniezone/Makefile @@ -6,5 +6,6 @@ GZVM_DIR ?= ../../../drivers/virt/geniezone -gzvm-y := $(GZVM_DIR)/gzvm_main.o $(GZVM_DIR)/gzvm_vm.o +gzvm-y := $(GZVM_DIR)/gzvm_main.o $(GZVM_DIR)/gzvm_mmu.o \ + $(GZVM_DIR)/gzvm_vm.o diff --git a/drivers/virt/geniezone/gzvm_mmu.c b/drivers/virt/geniezone/gzvm_mmu.c new file mode 100644 index 000000000000..17d696992d2c --- /dev/null +++ b/drivers/virt/geniezone/gzvm_mmu.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 MediaTek Inc. + */ + +#include <linux/gzvm_drv.h> + +/** + * hva_to_pa_fast() - converts hva to pa in generic fast way + * @hva: Host virtual address. + * + * Return: GZVM_PA_ERR_BAD for translation error + */ +u64 hva_to_pa_fast(u64 hva) +{ + struct page *page[1]; + u64 pfn; + + if (get_user_page_fast_only(hva, 0, page)) { + pfn = page_to_phys(page[0]); + put_page((struct page *)page); + return pfn; + } + return GZVM_PA_ERR_BAD; +} + +/** + * hva_to_pa_slow() - converts hva to pa in a slow way + * @hva: Host virtual address + * + * This function converts HVA to PA in a slow way because the target hva is not + * yet allocated and mapped in the host stage1 page table, we cannot find it + * directly from current page table. + * Thus, we have to allocate it and this operation is much slower than directly + * find via current page table. + * + * Context: This function may sleep + * Return: PA or GZVM_PA_ERR_BAD for translation error + */ +u64 hva_to_pa_slow(u64 hva) +{ + struct page *page = NULL; + u64 pfn = 0; + int npages; + + npages = get_user_pages_unlocked(hva, 1, &page, 0); + if (npages != 1) + return GZVM_PA_ERR_BAD; + + if (page) { + pfn = page_to_phys(page); + put_page(page); + return pfn; + } + + return GZVM_PA_ERR_BAD; +} + +static u64 __gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn) +{ + u64 hva, pa; + + hva = gzvm_gfn_to_hva_memslot(memslot, gfn); + + pa = gzvm_hva_to_pa_arch(hva); + if (pa != GZVM_PA_ERR_BAD) + return PHYS_PFN(pa); + + pa = hva_to_pa_fast(hva); + if (pa != GZVM_PA_ERR_BAD) + return PHYS_PFN(pa); + + pa = hva_to_pa_slow(hva); + if (pa != GZVM_PA_ERR_BAD) + return PHYS_PFN(pa); + + return GZVM_PA_ERR_BAD; +} + +/** + * gzvm_gfn_to_pfn_memslot() - Translate gfn (guest ipa) to pfn (host pa), + * result is in @pfn + * @memslot: Pointer to struct gzvm_memslot. + * @gfn: Guest frame number. + * @pfn: Host page frame number. + * + * Return: + * * 0 - Succeed + * * -EFAULT - Failed to convert + */ +int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, + u64 *pfn) +{ + u64 __pfn; + + if (!memslot) + return -EFAULT; + + __pfn = __gzvm_gfn_to_pfn_memslot(memslot, gfn); + if (__pfn == GZVM_PA_ERR_BAD) { + *pfn = 0; + return -EFAULT; + } + + *pfn = __pfn; + + return 0; +} diff --git a/drivers/virt/geniezone/gzvm_vm.c b/drivers/virt/geniezone/gzvm_vm.c index d5e850af924a..e0751811cce6 100644 --- a/drivers/virt/geniezone/gzvm_vm.c +++ b/drivers/virt/geniezone/gzvm_vm.c @@ -15,6 +15,187 @@ static DEFINE_MUTEX(gzvm_list_lock); static LIST_HEAD(gzvm_list); +u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn) +{ + u64 offset = gfn - memslot->base_gfn; + + return memslot->userspace_addr + offset * PAGE_SIZE; +} + +/** + * fill_constituents() - Populate pa to buffer until full + * @consti: Pointer to struct mem_region_addr_range. + * @consti_cnt: Constituent count. + * @max_nr_consti: Maximum number of constituent count. + * @gfn: Guest frame number. + * @total_pages: Total page numbers. + * @slot: Pointer to struct gzvm_memslot. + * + * Return: how many pages we've fill in, negative if error + */ +static int fill_constituents(struct mem_region_addr_range *consti, + int *consti_cnt, int max_nr_consti, u64 gfn, + u32 total_pages, struct gzvm_memslot *slot) +{ + u64 pfn, prev_pfn, gfn_end; + int nr_pages = 1; + int i = 0; + + if (unlikely(total_pages == 0)) + return -EINVAL; + gfn_end = gfn + total_pages; + + /* entry 0 */ + if (gzvm_gfn_to_pfn_memslot(slot, gfn, &pfn) != 0) + return -EFAULT; + consti[0].address = PFN_PHYS(pfn); + consti[0].pg_cnt = 1; + gfn++; + prev_pfn = pfn; + + while (i < max_nr_consti && gfn < gfn_end) { + if (gzvm_gfn_to_pfn_memslot(slot, gfn, &pfn) != 0) + return -EFAULT; + if (pfn == (prev_pfn + 1)) { + consti[i].pg_cnt++; + } else { + i++; + if (i >= max_nr_consti) + break; + consti[i].address = PFN_PHYS(pfn); + consti[i].pg_cnt = 1; + } + prev_pfn = pfn; + gfn++; + nr_pages++; + } + if (i != max_nr_consti) + i++; + *consti_cnt = i; + + return nr_pages; +} + +/** + * register_memslot_addr_range() - Register memory region to GenieZone + * @gzvm: Pointer to struct gzvm + * @memslot: Pointer to struct gzvm_memslot + * + * Return: 0 for success, negative number for error + */ +static int +register_memslot_addr_range(struct gzvm *gzvm, struct gzvm_memslot *memslot) +{ + struct gzvm_memory_region_ranges *region; + u32 buf_size = PAGE_SIZE * 2; + int max_nr_consti, remain_pages; + u64 gfn, gfn_end; + + region = alloc_pages_exact(buf_size, GFP_KERNEL); + if (!region) + return -ENOMEM; + max_nr_consti = (buf_size - sizeof(*region)) / + sizeof(struct mem_region_addr_range); + + region->slot = memslot->slot_id; + remain_pages = memslot->npages; + gfn = memslot->base_gfn; + gfn_end = gfn + remain_pages; + while (gfn < gfn_end) { + int nr_pages = fill_constituents(region->constituents, + ®ion->constituent_cnt, + max_nr_consti, gfn, + remain_pages, memslot); + if (nr_pages < 0) { + pr_err("Failed to fill constituents\n"); + free_pages_exact(region, buf_size); + return nr_pages; + } + region->gpa = PFN_PHYS(gfn); + region->total_pages = nr_pages; + + remain_pages -= nr_pages; + gfn += nr_pages; + + if (gzvm_arch_set_memregion(gzvm->vm_id, buf_size, + virt_to_phys(region))) { + pr_err("Failed to register memregion to hypervisor\n"); + free_pages_exact(region, buf_size); + return -EFAULT; + } + } + free_pages_exact(region, buf_size); + return 0; +} + +/** + * gzvm_vm_ioctl_set_memory_region() - Set memory region of guest + * @gzvm: Pointer to struct gzvm. + * @mem: Input memory region from user. + * + * Return: 0 for success, negative number for error + * + * -EXIO - The memslot is out-of-range + * -EFAULT - Cannot find corresponding vma + * -EINVAL - Region size and VMA size mismatch + */ +static int +gzvm_vm_ioctl_set_memory_region(struct gzvm *gzvm, + struct gzvm_userspace_memory_region *mem) +{ + struct vm_area_struct *vma; + struct gzvm_memslot *memslot; + unsigned long size; + __u32 slot; + + slot = mem->slot; + if (slot >= GZVM_MAX_MEM_REGION) + return -ENXIO; + memslot = &gzvm->memslot[slot]; + + vma = vma_lookup(gzvm->mm, mem->userspace_addr); + if (!vma) + return -EFAULT; + + size = vma->vm_end - vma->vm_start; + if (size != mem->memory_size) + return -EINVAL; + + memslot->base_gfn = __phys_to_pfn(mem->guest_phys_addr); + memslot->npages = size >> PAGE_SHIFT; + memslot->userspace_addr = mem->userspace_addr; + memslot->vma = vma; + memslot->flags = mem->flags; + memslot->slot_id = mem->slot; + return register_memslot_addr_range(gzvm, memslot); +} + +/* gzvm_vm_ioctl() - Ioctl handler of VM FD */ +static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + long ret; + void __user *argp = (void __user *)arg; + struct gzvm *gzvm = filp->private_data; + + switch (ioctl) { + case GZVM_SET_USER_MEMORY_REGION: { + struct gzvm_userspace_memory_region userspace_mem; + + if (copy_from_user(&userspace_mem, argp, sizeof(userspace_mem))) { + ret = -EFAULT; + goto out; + } + ret = gzvm_vm_ioctl_set_memory_region(gzvm, &userspace_mem); + break; + } + default: + ret = -ENOTTY; + } +out: + return ret; +} + static void gzvm_destroy_vm(struct gzvm *gzvm) { pr_debug("VM-%u is going to be destroyed\n", gzvm->vm_id); @@ -42,6 +223,7 @@ static int gzvm_vm_release(struct inode *inode, struct file *filp) static const struct file_operations gzvm_vm_fops = { .release = gzvm_vm_release, + .unlocked_ioctl = gzvm_vm_ioctl, .llseek = noop_llseek, }; diff --git a/include/linux/gzvm_drv.h b/include/linux/gzvm_drv.h index f1dce23838e4..aa1eaf4d43b4 100644 --- a/include/linux/gzvm_drv.h +++ b/include/linux/gzvm_drv.h @@ -7,9 +7,16 @@ #define __GZVM_DRV_H__ #include <linux/list.h> +#include <linux/mm.h> #include <linux/mutex.h> #include <linux/gzvm.h> +/* + * For the normal physical address, the highest 12 bits should be zero, so we + * can mask bit 62 ~ bit 52 to indicate the error physical address + */ +#define GZVM_PA_ERR_BAD (0x7ffULL << 52) + #define INVALID_VM_ID 0xffff /* @@ -27,10 +34,39 @@ * The following data structures are for data transferring between driver and * hypervisor, and they're aligned with hypervisor definitions */ +#define GZVM_MAX_MEM_REGION 10 + +/* struct mem_region_addr_range - Identical to ffa memory constituent */ +struct mem_region_addr_range { + /* the base IPA of the constituent memory region, aligned to 4 kiB */ + __u64 address; + /* the number of 4 kiB pages in the constituent memory region. */ + __u32 pg_cnt; + __u32 reserved; +}; + +struct gzvm_memory_region_ranges { + __u32 slot; + __u32 constituent_cnt; + __u64 total_pages; + __u64 gpa; + struct mem_region_addr_range constituents[]; +}; + +/* struct gzvm_memslot - VM's memory slot descriptor */ +struct gzvm_memslot { + u64 base_gfn; /* begin of guest page frame */ + unsigned long npages; /* number of pages this slot covers */ + unsigned long userspace_addr; /* corresponding userspace va */ + struct vm_area_struct *vma; /* vma related to this userspace addr */ + u32 flags; + u32 slot_id; +}; struct gzvm { /* userspace tied to this vm */ struct mm_struct *mm; + struct gzvm_memslot memslot[GZVM_MAX_MEM_REGION]; /* lock for list_add*/ struct mutex lock; struct list_head vm_list; @@ -45,7 +81,15 @@ void gzvm_destroy_all_vms(void); /* arch-dependant functions */ int gzvm_arch_probe(void); +int gzvm_arch_set_memregion(u16 vm_id, size_t buf_size, + phys_addr_t region); int gzvm_arch_create_vm(unsigned long vm_type); int gzvm_arch_destroy_vm(u16 vm_id); +u64 gzvm_hva_to_pa_arch(u64 hva); +u64 hva_to_pa_fast(u64 hva); +u64 hva_to_pa_slow(u64 hva); +int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn); +u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn); + #endif /* __GZVM_DRV_H__ */ diff --git a/include/uapi/linux/gzvm.h b/include/uapi/linux/gzvm.h index c26c7720fab7..d2d5e6cfc2c9 100644 --- a/include/uapi/linux/gzvm.h +++ b/include/uapi/linux/gzvm.h @@ -22,4 +22,30 @@ /* ioctls for /dev/gzvm fds */ #define GZVM_CREATE_VM _IO(GZVM_IOC_MAGIC, 0x01) /* Returns a Geniezone VM fd */ +/* ioctls for VM fds */ +/* for GZVM_SET_MEMORY_REGION */ +struct gzvm_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ +}; + +#define GZVM_SET_MEMORY_REGION _IOW(GZVM_IOC_MAGIC, 0x40, \ + struct gzvm_memory_region) + +/* for GZVM_SET_USER_MEMORY_REGION */ +struct gzvm_userspace_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + /* bytes */ + __u64 memory_size; + /* start of the userspace allocated memory */ + __u64 userspace_addr; +}; + +#define GZVM_SET_USER_MEMORY_REGION _IOW(GZVM_IOC_MAGIC, 0x46, \ + struct gzvm_userspace_memory_region) + #endif /* __GZVM_H__ */ -- 2.18.0