On Wed, Nov 15, 2023 at 02:14:11AM -0500, Xiaoyao Li wrote: > Add KVM guest_memfd support to RAMBlock so both normal hva based memory > and kvm guest memfd based private memory can be associated in one RAMBlock. > > Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to > create private guest_memfd during RAMBlock setup. > > Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of > confidential guests, such as TDX VM. How and when to set it for memory > backends will be implemented in the following patches. > > Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has > KVM guest_memfd allocated. > > Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> > --- > Changes in v3: > - rename gmem to guest_memfd; > - close(guest_memfd) when RAMBlock is released; (Daniel P. Berrangé) > - Suqash the patch that introduces memory_region_has_guest_memfd(). > --- > accel/kvm/kvm-all.c | 24 ++++++++++++++++++++++++ > include/exec/memory.h | 13 +++++++++++++ > include/exec/ramblock.h | 1 + > include/sysemu/kvm.h | 2 ++ > system/memory.c | 5 +++++ > system/physmem.c | 27 ++++++++++++++++++++++++--- > 6 files changed, 69 insertions(+), 3 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index c1b40e873531..9f751d4971f8 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -101,6 +101,7 @@ bool kvm_msi_use_devid; > bool kvm_has_guest_debug; > static int kvm_sstep_flags; > static bool kvm_immediate_exit; > +static bool kvm_guest_memfd_supported; > static hwaddr kvm_max_slot_size = ~0; > > static const KVMCapabilityInfo kvm_required_capabilites[] = { > @@ -2397,6 +2398,8 @@ static int kvm_init(MachineState *ms) > } > s->as = g_new0(struct KVMAs, s->nr_as); > > + kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD); > + > if (object_property_find(OBJECT(current_machine), "kvm-type")) { > g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine), > "kvm-type", > @@ -4078,3 +4081,24 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) > query_stats_schema_vcpu(first_cpu, &stats_args); > } > } > + > +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) > +{ > + int fd; > + struct kvm_create_guest_memfd guest_memfd = { > + .size = size, > + .flags = flags, > + }; > + > + if (!kvm_guest_memfd_supported) { > + error_setg(errp, "KVM doesn't support guest memfd\n"); > + return -EOPNOTSUPP; Returning an errno value is unusual when we have an 'Error **errp' parameter for reporting, and the following codepath merely returns -1, so this is inconsistent. Just return -1 here too. > + } > + > + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); > + if (fd < 0) { > + error_setg_errno(errp, errno, "%s: error creating kvm guest memfd\n", __func__); I'd prefer an explicit 'return -1' here, even though 'fd' is technically going to be -1 already. Also including __func__ in the error message is not really needed IMHO > + } > + > + return fd; > +} > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 831f7c996d9d..f780367ab1bd 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { > /* RAM FD is opened read-only */ > #define RAM_READONLY_FD (1 << 11) > > +/* RAM can be private that has kvm gmem backend */ > +#define RAM_GUEST_MEMFD (1 << 12) > + > static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, > IOMMUNotifierFlag flags, > hwaddr start, hwaddr end, > @@ -1702,6 +1705,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) > */ > bool memory_region_is_protected(MemoryRegion *mr); > > +/** > + * memory_region_has_guest_memfd: check whether a memory region has guest_memfd > + * associated > + * > + * Returns %true if a memory region's ram_block has valid guest_memfd assigned. > + * > + * @mr: the memory region being queried > + */ > +bool memory_region_has_guest_memfd(MemoryRegion *mr); > + > /** > * memory_region_get_iommu: check whether a memory region is an iommu > * > diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h > index 69c6a5390293..0a17ba882729 100644 > --- a/include/exec/ramblock.h > +++ b/include/exec/ramblock.h > @@ -41,6 +41,7 @@ struct RAMBlock { > QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; > int fd; > uint64_t fd_offset; > + int guest_memfd; > size_t page_size; > /* dirty bitmap used during migration */ > unsigned long *bmap; > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index d61487816421..fedc28c7d17f 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -538,4 +538,6 @@ bool kvm_arch_cpu_check_are_resettable(void); > bool kvm_dirty_ring_enabled(void); > > uint32_t kvm_dirty_ring_size(void); > + > +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); > #endif > diff --git a/system/memory.c b/system/memory.c > index 304fa843ea12..69741d91bbb7 100644 > --- a/system/memory.c > +++ b/system/memory.c > @@ -1862,6 +1862,11 @@ bool memory_region_is_protected(MemoryRegion *mr) > return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); > } > > +bool memory_region_has_guest_memfd(MemoryRegion *mr) > +{ > + return mr->ram_block && mr->ram_block->guest_memfd >= 0; > +} > + > uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) > { > uint8_t mask = mr->dirty_log_mask; > diff --git a/system/physmem.c b/system/physmem.c > index fc2b0fee0188..0af2213cbd9c 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -1841,6 +1841,20 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) > } > } > > +#ifdef CONFIG_KVM > + if (kvm_enabled() && new_block->flags & RAM_GUEST_MEMFD && > + new_block->guest_memfd < 0) { > + /* TODO: to decide if KVM_GUEST_MEMFD_ALLOW_HUGEPAGE is supported */ > + uint64_t flags = 0; > + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, > + flags, errp); > + if (new_block->guest_memfd < 0) { > + qemu_mutex_unlock_ramlist(); > + return; > + } > + } > +#endif > + > new_ram_size = MAX(old_ram_size, > (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); > if (new_ram_size > old_ram_size) { > @@ -1903,7 +1917,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, > /* Just support these ram flags by now. */ > assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | > RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | > - RAM_READONLY_FD)) == 0); > + RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); > > if (xen_enabled()) { > error_setg(errp, "-mem-path not supported with Xen"); > @@ -1938,6 +1952,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, > new_block->used_length = size; > new_block->max_length = size; > new_block->flags = ram_flags; > + new_block->guest_memfd = -1; > new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, > errp); > if (!new_block->host) { > @@ -2016,7 +2031,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, > Error *local_err = NULL; > > assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | > - RAM_NORESERVE)) == 0); > + RAM_NORESERVE| RAM_GUEST_MEMFD)) == 0); > assert(!host ^ (ram_flags & RAM_PREALLOC)); > > size = HOST_PAGE_ALIGN(size); > @@ -2028,6 +2043,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, > new_block->max_length = max_size; > assert(max_size >= size); > new_block->fd = -1; > + new_block->guest_memfd = -1; > new_block->page_size = qemu_real_host_page_size(); > new_block->host = host; > new_block->flags = ram_flags; > @@ -2050,7 +2066,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, > MemoryRegion *mr, Error **errp) > { > - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); > + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); > return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); > } > > @@ -2078,6 +2094,11 @@ static void reclaim_ramblock(RAMBlock *block) > } else { > qemu_anon_ram_free(block->host, block->max_length); > } > + > + if (block->guest_memfd >= 0) { > + close(block->guest_memfd); > + } > + > g_free(block); > } > > -- > 2.34.1 > With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|