On 11/15/2023 6:42 PM, Daniel P. Berrangé wrote:
On Wed, Nov 15, 2023 at 02:14:19AM -0500, Xiaoyao Li wrote:
From: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when
KVM_EXIT_MEMORY_FAULT happens. It indicates userspace needs to do
the memory conversion on the RAMBlock to turn the memory into desired
attribute, i.e., private/shared.
Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has
guest_memfd memory backend.
Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is
added.
Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Co-developed-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
---
accel/kvm/kvm-all.c | 76 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 66 insertions(+), 10 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 76e2404d54d2..58abbcb6926e 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2902,6 +2902,50 @@ static void kvm_eat_signals(CPUState *cpu)
} while (sigismember(&chkset, SIG_IPI));
}
+static int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
+{
+ MemoryRegionSection section;
+ ram_addr_t offset;
+ RAMBlock *rb;
+ void *addr;
+ int ret = -1;
+
+ section = memory_region_find(get_system_memory(), start, size);
+ if (!section.mr) {
+ return ret;
+ }
+
+ if (memory_region_has_guest_memfd(section.mr)) {
+ if (to_private) {
+ ret = kvm_set_memory_attributes_private(start, size);
+ } else {
+ ret = kvm_set_memory_attributes_shared(start, size);
+ }
+
+ if (ret) {
+ memory_region_unref(section.mr);
+ return ret;
+ }
+
+ addr = memory_region_get_ram_ptr(section.mr) +
+ section.offset_within_region;
+ rb = qemu_ram_block_from_host(addr, false, &offset);
+ /*
+ * With KVM_SET_MEMORY_ATTRIBUTES by kvm_set_memory_attributes(),
+ * operation on underlying file descriptor is only for releasing
+ * unnecessary pages.
+ */
+ ram_block_convert_range(rb, offset, size, to_private);
+ } else {
+ warn_report("Convert non guest_memfd backed memory region "
+ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
Again, if you're returning '-1' to indicate error, then
using warn_report is wrong, it should be error_report.
warn_report is for when you return success, indicating
the problem was non-fatal.
Learned.
Thanks!
+ }
+
+ memory_region_unref(section.mr);
+ return ret;
+}
+
int kvm_cpu_exec(CPUState *cpu)
{
struct kvm_run *run = cpu->kvm_run;
@@ -2969,18 +3013,20 @@ int kvm_cpu_exec(CPUState *cpu)
ret = EXCP_INTERRUPT;
break;
}
- fprintf(stderr, "error: kvm run failed %s\n",
- strerror(-run_ret));
+ if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
+ fprintf(stderr, "error: kvm run failed %s\n",
+ strerror(-run_ret));
#ifdef TARGET_PPC
- if (run_ret == -EBUSY) {
- fprintf(stderr,
- "This is probably because your SMT is enabled.\n"
- "VCPU can only run on primary threads with all "
- "secondary threads offline.\n");
- }
+ if (run_ret == -EBUSY) {
+ fprintf(stderr,
+ "This is probably because your SMT is enabled.\n"
+ "VCPU can only run on primary threads with all "
+ "secondary threads offline.\n");
+ }
#endif
- ret = -1;
- break;
+ ret = -1;
+ break;
+ }
}
trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
@@ -3067,6 +3113,16 @@ int kvm_cpu_exec(CPUState *cpu)
break;
}
break;
+ case KVM_EXIT_MEMORY_FAULT:
+ if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
+ error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
+ (uint64_t)run->memory_fault.flags);
+ ret = -1;
+ break;
+ }
+ ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
+ run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ break;
default:
DPRINTF("kvm_arch_handle_exit\n");
ret = kvm_arch_handle_exit(cpu, run);
--
2.34.1
With regards,
Daniel