Hi, Regarding the subject (and the commit message), should we still be calling them "private" slots, or guestmem_slots? On Sun, Nov 5, 2023 at 4:34 PM Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > > From: Sean Christopherson <seanjc@xxxxxxxxxx> > > Add support for creating "private" memslots via KVM_CREATE_GUEST_MEMFD and > KVM_SET_USER_MEMORY_REGION2. Make vm_userspace_mem_region_add() a wrapper > to its effective replacement, vm_mem_add(), so that private memslots are > fully opt-in, i.e. don't require update all tests that add memory regions. nit: update->updating > > Pivot on the KVM_MEM_PRIVATE flag instead of the validity of the "gmem" KVM_MEM_PRIVATE -> KVM_MEM_GUEST_MEMFD > file descriptor so that simple tests can let vm_mem_add() do the heavy > lifting of creating the guest memfd, but also allow the caller to pass in > an explicit fd+offset so that fancier tests can do things like back > multiple memslots with a single file. If the caller passes in a fd, dup() > the fd so that (a) __vm_mem_region_delete() can close the fd associated > with the memory region without needing yet another flag, and (b) so that > the caller can safely close its copy of the fd without having to first > destroy memslots. > > Co-developed-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Message-Id: <20231027182217.3615211-27-seanjc@xxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > .../selftests/kvm/include/kvm_util_base.h | 23 ++++++ > .../testing/selftests/kvm/include/test_util.h | 5 ++ > tools/testing/selftests/kvm/lib/kvm_util.c | 76 +++++++++++-------- > 3 files changed, 73 insertions(+), 31 deletions(-) > > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h > index 9f144841c2ee..9f861182c02a 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h > @@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) > > void vm_create_irqchip(struct kvm_vm *vm); > > +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, > + uint64_t flags) > +{ > + struct kvm_create_guest_memfd guest_memfd = { > + .size = size, > + .flags = flags, > + }; > + > + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); > +} > + > +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, > + uint64_t flags) > +{ > + int fd = __vm_create_guest_memfd(vm, size, flags); > + > + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); > + return fd; > +} > + > void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, > uint64_t gpa, uint64_t size, void *hva); > int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, > @@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > enum vm_mem_backing_src_type src_type, > uint64_t guest_paddr, uint32_t slot, uint64_t npages, > uint32_t flags); > +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, > + uint64_t guest_paddr, uint32_t slot, uint64_t npages, > + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); > > void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); > void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); > diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h > index 7e614adc6cf4..7257f2243ab9 100644 > --- a/tools/testing/selftests/kvm/include/test_util.h > +++ b/tools/testing/selftests/kvm/include/test_util.h > @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) > return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; > } > > +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) > +{ > + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; > +} > + > /* Aligns x up to the next multiple of size. Size must be a power of 2. */ > static inline uint64_t align_up(uint64_t x, uint64_t size) > { > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c > index 3676b37bea38..b63500fca627 100644 > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > @@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, > TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); > close(region->fd); > } > + if (region->region.guest_memfd >= 0) > + close(region->region.guest_memfd); > > free(region); > } > @@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, > errno, strerror(errno)); > } > > -/* > - * VM Userspace Memory Region Add > - * > - * Input Args: > - * vm - Virtual Machine > - * src_type - Storage source for this region. > - * NULL to use anonymous memory. "VM_MEM_SRC_ANONYMOUS to use anonymous memory" > - * guest_paddr - Starting guest physical address > - * slot - KVM region slot > - * npages - Number of physical pages > - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) > - * > - * Output Args: None > - * > - * Return: None > - * > - * Allocates a memory area of the number of pages specified by npages > - * and maps it to the VM specified by vm, at a starting physical address > - * given by guest_paddr. The region is created with a KVM region slot > - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The > - * region is created with the flags given by flags. > - */ > -void vm_userspace_mem_region_add(struct kvm_vm *vm, > - enum vm_mem_backing_src_type src_type, > - uint64_t guest_paddr, uint32_t slot, uint64_t npages, > - uint32_t flags) > +/* FIXME: This thing needs to be ripped apart and rewritten. */ It sure does :) With these nits: Reviewed-by: Fuad Tabba <tabba@xxxxxxxxxx> Tested-by: Fuad Tabba <tabba@xxxxxxxxxx> Cheers, /fuad > +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, > + uint64_t guest_paddr, uint32_t slot, uint64_t npages, > + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) > { > int ret; > struct userspace_mem_region *region; > size_t backing_src_pagesz = get_backing_src_pagesz(src_type); > + size_t mem_size = npages * vm->page_size; > size_t alignment; > > TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, > @@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > /* Allocate and initialize new mem region structure. */ > region = calloc(1, sizeof(*region)); > TEST_ASSERT(region != NULL, "Insufficient Memory"); > - region->mmap_size = npages * vm->page_size; > + region->mmap_size = mem_size; > > #ifdef __s390x__ > /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ > @@ -999,14 +980,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > /* As needed perform madvise */ > if ((src_type == VM_MEM_SRC_ANONYMOUS || > src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { > - ret = madvise(region->host_mem, npages * vm->page_size, > + ret = madvise(region->host_mem, mem_size, > src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); > TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", > - region->host_mem, npages * vm->page_size, > + region->host_mem, mem_size, > vm_mem_backing_src_alias(src_type)->name); > } > > region->backing_src_type = src_type; > + > + if (flags & KVM_MEM_GUEST_MEMFD) { > + if (guest_memfd < 0) { > + uint32_t guest_memfd_flags = 0; > + TEST_ASSERT(!guest_memfd_offset, > + "Offset must be zero when creating new guest_memfd"); > + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); > + } else { > + /* > + * Install a unique fd for each memslot so that the fd > + * can be closed when the region is deleted without > + * needing to track if the fd is owned by the framework > + * or by the caller. > + */ > + guest_memfd = dup(guest_memfd); > + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); > + } > + > + region->region.guest_memfd = guest_memfd; > + region->region.guest_memfd_offset = guest_memfd_offset; > + } else { > + region->region.guest_memfd = -1; > + } > + > region->unused_phy_pages = sparsebit_alloc(); > sparsebit_set_num(region->unused_phy_pages, > guest_paddr >> vm->page_shift, npages); > @@ -1019,9 +1024,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" > " rc: %i errno: %i\n" > " slot: %u flags: 0x%x\n" > - " guest_phys_addr: 0x%lx size: 0x%lx", > + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", > ret, errno, slot, flags, > - guest_paddr, (uint64_t) region->region.memory_size); > + guest_paddr, (uint64_t) region->region.memory_size, > + region->region.guest_memfd); > > /* Add to quick lookup data structures */ > vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); > @@ -1042,6 +1048,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, > } > } > > +void vm_userspace_mem_region_add(struct kvm_vm *vm, > + enum vm_mem_backing_src_type src_type, > + uint64_t guest_paddr, uint32_t slot, > + uint64_t npages, uint32_t flags) > +{ > + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); > +} > + > /* > * Memslot to region > * > -- > 2.39.1 > >