On 02/12/2022 17:44, Fuad Tabba wrote: > If specified by the option and supported by KVM, allocate guest > memory as restricted with the new system call. > > Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> > --- > arm/aarch64/pvtime.c | 2 +- > hw/vesa.c | 2 +- > include/kvm/util.h | 2 +- > util/util.c | 12 ++++++++---- > 4 files changed, 11 insertions(+), 7 deletions(-) > > diff --git a/arm/aarch64/pvtime.c b/arm/aarch64/pvtime.c > index a452938..8247c52 100644 > --- a/arm/aarch64/pvtime.c > +++ b/arm/aarch64/pvtime.c > @@ -16,7 +16,7 @@ static int pvtime__alloc_region(struct kvm *kvm) > int mem_fd; > int ret = 0; > > - mem_fd = memfd_alloc(ARM_PVTIME_SIZE, false, 0); > + mem_fd = memfd_alloc(kvm, ARM_PVTIME_SIZE, false, 0); > if (mem_fd < 0) > return -errno; > > diff --git a/hw/vesa.c b/hw/vesa.c > index 3233794..6c5287a 100644 > --- a/hw/vesa.c > +++ b/hw/vesa.c > @@ -90,7 +90,7 @@ struct framebuffer *vesa__init(struct kvm *kvm) > if (r < 0) > goto unregister_ioport; > > - mem_fd = memfd_alloc(ARM_PVTIME_SIZE, false, 0, 0); > + mem_fd = memfd_alloc(kvm, ARM_PVTIME_SIZE, false, 0, 0); > if (mem_fd < 0) { > r = -errno; > goto unregister_device; > diff --git a/include/kvm/util.h b/include/kvm/util.h > index 79275ed..5a98d4a 100644 > --- a/include/kvm/util.h > +++ b/include/kvm/util.h > @@ -139,7 +139,7 @@ static inline int pow2_size(unsigned long x) > } > > struct kvm; > -int memfd_alloc(u64 size, bool hugetlb, u64 blk_size); > +int memfd_alloc(struct kvm *kvm, size_t size, bool hugetlb, u64 hugepage_size); > void *mmap_anon_or_hugetlbfs_align(struct kvm *kvm, const char *hugetlbfs_path, > u64 size, u64 align); > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); > diff --git a/util/util.c b/util/util.c > index 107f34d..13b3e82 100644 > --- a/util/util.c > +++ b/util/util.c > @@ -17,7 +17,7 @@ > __SYSCALL(__NR_memfd_restricted, sys_memfd_restricted) > #endif > > -static inline int memfd_restricted(unsigned int flags) > +static int memfd_restricted(unsigned int flags) > { > return syscall(__NR_memfd_restricted, flags); > } > @@ -106,7 +106,7 @@ static u64 get_hugepage_blk_size(const char *hugetlbfs_path) > return sfs.f_bsize; > } > > -int memfd_alloc(u64 size, bool hugetlb, u64 blk_size) > +int memfd_alloc(struct kvm *kvm, size_t size, bool hugetlb, u64 blk_size) > { > const char *name = "kvmtool"; > unsigned int flags = 0; > @@ -120,7 +120,11 @@ int memfd_alloc(u64 size, bool hugetlb, u64 blk_size) > flags |= blk_size << MFD_HUGE_SHIFT; > } > > - fd = memfd_create(name, flags); > + if (kvm->cfg.restricted_mem) > + fd = memfd_restricted(flags); > + else > + fd = memfd_create(name, flags); > + > if (fd < 0) > die_perror("Can't memfd_create for memory map"); > > @@ -167,7 +171,7 @@ void *mmap_anon_or_hugetlbfs_align(struct kvm *kvm, const char *hugetlbfs_path, > if (addr_map == MAP_FAILED) > return MAP_FAILED; > > - fd = memfd_alloc(size, hugetlbfs_path, blk_size); > + fd = memfd_alloc(kvm, size, hugetlbfs_path, blk_size); > if (fd < 0) > return MAP_FAILED; > Extra context: > /* Map the allocated memory in the fd to the specified alignment. */ > addr_align = (void *)ALIGN((u64)addr_map, align_sz); > if (mmap(addr_align, size, PROT_RW, MAP_SHARED | MAP_FIXED, fd, 0) == > MAP_FAILED) { > close(fd); > return MAP_FAILED; > } So I don't understand how this works. My understanding is that memfd_restricted() returns a file descriptor that cannot be mapped in user space. So surely this mmap() will always fail (when kvm->cfg.restricted_mem)? What am I missing? Thanks, Steve