From: Sean Christopherson <seanjc@xxxxxxxxxx> Add support for creating "private" memslots via KVM_CREATE_GUEST_MEMFD and KVM_SET_USER_MEMORY_REGION2. Make vm_userspace_mem_region_add() a wrapper to its effective replacement, vm_mem_add(), so that private memslots are fully opt-in, i.e. don't require update all tests that add memory regions. Pivot on the KVM_MEM_PRIVATE flag instead of the validity of the "gmem" file descriptor so that simple tests can let vm_mem_add() do the heavy lifting of creating the guest memfd, but also allow the caller to pass in an explicit fd+offset so that fancier tests can do things like back multiple memslots with a single file. If the caller passes in a fd, dup() the fd so that (a) __vm_mem_region_delete() can close the fd associated with the memory region without needing yet another flag, and (b) so that the caller can safely close its copy of the fd without having to first destroy memslots. Co-developed-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> Message-Id: <20231027182217.3615211-27-seanjc@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- .../selftests/kvm/include/kvm_util_base.h | 23 ++++++ .../testing/selftests/kvm/include/test_util.h | 5 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 76 +++++++++++-------- 3 files changed, 73 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 9f144841c2ee..9f861182c02a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) void vm_create_irqchip(struct kvm_vm *vm); +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, @@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7e614adc6cf4..7257f2243ab9 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) +{ + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; +} + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static inline uint64_t align_up(uint64_t x, uint64_t size) { diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3676b37bea38..b63500fca627 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); close(region->fd); } + if (region->region.guest_memfd >= 0) + close(region->region.guest_memfd); free(region); } @@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } -/* - * VM Userspace Memory Region Add - * - * Input Args: - * vm - Virtual Machine - * src_type - Storage source for this region. - * NULL to use anonymous memory. - * guest_paddr - Starting guest physical address - * slot - KVM region slot - * npages - Number of physical pages - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) - * - * Output Args: None - * - * Return: None - * - * Allocates a memory area of the number of pages specified by npages - * and maps it to the VM specified by vm, at a starting physical address - * given by guest_paddr. The region is created with a KVM region slot - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The - * region is created with the flags given by flags. - */ -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags) +/* FIXME: This thing needs to be ripped apart and rewritten. */ +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); + size_t mem_size = npages * vm->page_size; size_t alignment; TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, @@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* Allocate and initialize new mem region structure. */ region = calloc(1, sizeof(*region)); TEST_ASSERT(region != NULL, "Insufficient Memory"); - region->mmap_size = npages * vm->page_size; + region->mmap_size = mem_size; #ifdef __s390x__ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ @@ -999,14 +980,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { - ret = madvise(region->host_mem, npages * vm->page_size, + ret = madvise(region->host_mem, mem_size, src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", - region->host_mem, npages * vm->page_size, + region->host_mem, mem_size, vm_mem_backing_src_alias(src_type)->name); } region->backing_src_type = src_type; + + if (flags & KVM_MEM_GUEST_MEMFD) { + if (guest_memfd < 0) { + uint32_t guest_memfd_flags = 0; + TEST_ASSERT(!guest_memfd_offset, + "Offset must be zero when creating new guest_memfd"); + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); + } else { + /* + * Install a unique fd for each memslot so that the fd + * can be closed when the region is deleted without + * needing to track if the fd is owned by the framework + * or by the caller. + */ + guest_memfd = dup(guest_memfd); + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + } + + region->region.guest_memfd = guest_memfd; + region->region.guest_memfd_offset = guest_memfd_offset; + } else { + region->region.guest_memfd = -1; + } + region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1019,9 +1024,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx", + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size); + guest_paddr, (uint64_t) region->region.memory_size, + region->region.guest_memfd); /* Add to quick lookup data structures */ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); @@ -1042,6 +1048,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, } } +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, + uint64_t npages, uint32_t flags) +{ + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); +} + /* * Memslot to region * -- 2.39.1