From: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM.
With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that
backend'ed both by hva-based shared memory and guest memfd based private
memory.
Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Co-developed-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
---
accel/kvm/kvm-all.c | 56 ++++++++++++++++++++++++++++++++++------
accel/kvm/trace-events | 2 +-
include/sysemu/kvm_int.h | 2 ++
3 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 9f751d4971f8..69afeb47c9c0 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -293,35 +293,69 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
{
KVMState *s = kvm_state;
- struct kvm_userspace_memory_region mem;
+ struct kvm_userspace_memory_region2 mem;
+ static int cap_user_memory2 = -1;
int ret;
+ if (cap_user_memory2 == -1) {
+ cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2);
+ }
+
+ if (!cap_user_memory2 && slot->guest_memfd >= 0) {
+ error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2,"
+ " which is required by guest memfd!", __func__);
+ exit(1);
+ }
+
mem.slot = slot->slot | (kml->as_id << 16);
mem.guest_phys_addr = slot->start_addr;
mem.userspace_addr = (unsigned long)slot->ram;
mem.flags = slot->flags;
+ mem.guest_memfd = slot->guest_memfd;
+ mem.guest_memfd_offset = slot->guest_memfd_offset;
if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
/* Set the slot size to 0 before setting the slot to the desired
* value. This is needed based on KVM commit 75d61fbc. */
mem.memory_size = 0;
- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+
+ if (cap_user_memory2) {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+ } else {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+ }
if (ret < 0) {
goto err;
}
}
mem.memory_size = slot->memory_size;
- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+ if (cap_user_memory2) {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+ } else {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+ }
slot->old_flags = mem.flags;
err:
trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
mem.guest_phys_addr, mem.memory_size,
- mem.userspace_addr, ret);
+ mem.userspace_addr, mem.guest_memfd,
+ mem.guest_memfd_offset, ret);
if (ret < 0) {
- error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
- " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
- __func__, mem.slot, slot->start_addr,
- (uint64_t)mem.memory_size, strerror(errno));
+ if (cap_user_memory2) {
+ error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
+ " start=0x%" PRIx64 ", size=0x%" PRIx64 ","
+ " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
+ " guest_memfd_offset=0x%" PRIx64 ": %s",
+ __func__, mem.slot, slot->start_addr,
+ (uint64_t)mem.memory_size, mem.flags,
+ mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
+ strerror(errno));
+ } else {
+ error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
+ " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
+ __func__, mem.slot, slot->start_addr,
+ (uint64_t)mem.memory_size, strerror(errno));
+ }
}
return ret;
}
@@ -477,6 +511,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
if (readonly && kvm_readonly_mem_allowed) {
flags |= KVM_MEM_READONLY;
}
+ if (memory_region_has_guest_memfd(mr)) {
+ flags |= KVM_MEM_PRIVATE;
+ }