On Wed, Mar 20, 2024 at 10:59 AM Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > I will now focus on reviewing patches 6-20. This way we can prepare a > common tree for SEV_INIT2/SNP/TDX, for both vendors to build upon. Ok, the attachment is the delta that I have. The only major change is requiring discard (thus effectively blocking VFIO support for SEV-SNP/TDX, at least for now). I will push it shortly to the same sevinit2 branch, and will post the patches sometime soon. Xiaoyao, you can use that branch too (it's on https://gitlab.com/bonzini/qemu) as the basis for your TDX work. Paolo
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index bf0ae0c8adb..428468950d9 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -285,19 +285,8 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo { KVMState *s = kvm_state; struct kvm_userspace_memory_region2 mem; - static int cap_user_memory2 = -1; int ret; - if (cap_user_memory2 == -1) { - cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2); - } - - if (!cap_user_memory2 && slot->guest_memfd >= 0) { - error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2," - " which is required by guest memfd!", __func__); - exit(1); - } - mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; @@ -310,7 +299,7 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo * value. This is needed based on KVM commit 75d61fbc. */ mem.memory_size = 0; - if (cap_user_memory2) { + if (kvm_guest_memfd_supported) { ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); } else { ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); @@ -320,7 +309,7 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo } } mem.memory_size = slot->memory_size; - if (cap_user_memory2) { + if (kvm_guest_memfd_supported) { ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); } else { ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); @@ -332,7 +321,7 @@ err: mem.userspace_addr, mem.guest_memfd, mem.guest_memfd_offset, ret); if (ret < 0) { - if (cap_user_memory2) { + if (kvm_guest_memfd_supported) { error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," " start=0x%" PRIx64 ", size=0x%" PRIx64 "," " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," @@ -502,6 +491,7 @@ static int kvm_mem_flags(MemoryRegion *mr) flags |= KVM_MEM_READONLY; } if (memory_region_has_guest_memfd(mr)) { + assert(kvm_guest_memfd_supported); flags |= KVM_MEM_GUEST_MEMFD; } return flags; @@ -1310,18 +1300,7 @@ static int kvm_set_memory_attributes(hwaddr start, hwaddr size, uint64_t attr) struct kvm_memory_attributes attrs; int r; - if (kvm_supported_memory_attributes == 0) { - error_report("No memory attribute supported by KVM\n"); - return -EINVAL; - } - - if ((attr & kvm_supported_memory_attributes) != attr) { - error_report("memory attribute 0x%lx not supported by KVM," - " supported bits are 0x%lx\n", - attr, kvm_supported_memory_attributes); - return -EINVAL; - } - + assert((attr & kvm_supported_memory_attributes) == attr); attrs.attributes = attr; attrs.address = start; attrs.size = size; @@ -2488,11 +2467,14 @@ static int kvm_init(MachineState *ms) } s->as = g_new0(struct KVMAs, s->nr_as); - kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD); - ret = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); kvm_supported_memory_attributes = ret > 0 ? ret : 0; + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + if (object_property_find(OBJECT(current_machine), "kvm-type")) { g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine), "kvm-type", @@ -2962,14 +2944,10 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) */ return 0; } else { - ret = ram_block_discard_is_disabled() - ? ram_block_discard_range(rb, offset, size) - : 0; + ret = ram_block_discard_range(rb, offset, size); } } else { - ret = ram_block_discard_is_disabled() - ? ram_block_discard_guest_memfd_range(rb, offset, size) - : 0; + ret = ram_block_discard_guest_memfd_range(rb, offset, size); } } else { error_report("Convert non guest_memfd backed memory region " diff --git a/system/physmem.c b/system/physmem.c index 8be8053cf77..b839be58538 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1810,6 +1810,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) const bool shared = qemu_ram_is_shared(new_block); RAMBlock *block; RAMBlock *last_block = NULL; + bool free_on_error = false; ram_addr_t old_ram_size, new_ram_size; Error *err = NULL; @@ -1839,17 +1840,26 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) return; } memory_try_enable_merging(new_block->host, new_block->max_length); + free_on_error = true; } } - if (kvm_enabled() && (new_block->flags & RAM_GUEST_MEMFD)) { + if (new_block->flags & RAM_GUEST_MEMFD) { + assert(kvm_enabled()); assert(new_block->guest_memfd < 0); + if (ram_block_discard_require(true) < 0) { + error_setg_errno(errp, errno, + "cannot set up private guest memory: discard currently blocked"); + error_append_hint(errp, "Are you using assigned devices?\n"); + goto out_free; + } + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, 0, errp); if (new_block->guest_memfd < 0) { qemu_mutex_unlock_ramlist(); - return; + goto out_free; } } @@ -1901,6 +1911,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_block_notify_add(new_block->host, new_block->used_length, new_block->max_length); } + return; + +out_free: + if (free_on_error) { + qemu_anon_ram_free(new_block->host, new_block->max_length); + new_block->host = NULL; + } } #ifdef CONFIG_POSIX @@ -2032,7 +2049,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, int align; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | - RAM_NORESERVE| RAM_GUEST_MEMFD)) == 0); + RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); align = qemu_real_host_page_size(); @@ -2101,6 +2118,7 @@ static void reclaim_ramblock(RAMBlock *block) if (block->guest_memfd >= 0) { close(block->guest_memfd); + ram_block_discard_require(false); } g_free(block);