[WIP Patch v2 09/14] KVM: Introduce KVM_CAP_MEMORY_FAULT_NOWAIT without implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add documentation, memslot flags, useful helper functions, and the
actual new capability itself.

Memory fault exits on absent mappings are particularly useful for
userfaultfd-based live migration postcopy. When many vCPUs fault upon a
single userfaultfd the faults can take a while to surface to userspace
due to having to contend for uffd wait queue locks. Bypassing the uffd
entirely by triggering a vCPU exit avoids this contention and can improve
the fault rate by as much as 10x.
---
 Documentation/virt/kvm/api.rst | 37 +++++++++++++++++++++++++++++++---
 include/linux/kvm_host.h       |  6 ++++++
 include/uapi/linux/kvm.h       |  3 +++
 tools/include/uapi/linux/kvm.h |  2 ++
 virt/kvm/kvm_main.c            |  7 ++++++-
 5 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index f9ca18bbec879..4932c0f62eb3d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1312,6 +1312,7 @@ yet and must be cleared on entry.
   /* for kvm_userspace_memory_region::flags */
   #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
   #define KVM_MEM_READONLY	(1UL << 1)
+  #define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
 
 This ioctl allows the user to create, modify or delete a guest physical
 memory slot.  Bits 0-15 of "slot" specify the slot id and this value
@@ -1342,12 +1343,15 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 be identical.  This allows large pages in the guest to be backed by large
 pages in the host.
 
-The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
-KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
+The flags field supports three flags
+
+1.  KVM_MEM_LOG_DIRTY_PAGES: can be set to instruct KVM to keep track of
 writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
-use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
+use it.
+2.  KVM_MEM_READONLY: can be set, if KVM_CAP_READONLY_MEM capability allows it,
 to make a new slot read-only.  In this case, writes to this memory will be
 posted to userspace as KVM_EXIT_MMIO exits.
+3.  KVM_MEM_ABSENT_MAPPING_FAULT: see KVM_CAP_MEMORY_FAULT_NOWAIT for details.
 
 When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
 the memory region are automatically reflected into the guest.  For example, an
@@ -7702,10 +7706,37 @@ Through args[0], the capability can be set on a per-exit-reason basis.
 Currently, the only exit reasons supported are
 
 1. KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
+2. KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
 
 Memory fault exits with a reason of UNKNOWN should not be depended upon: they
 may be added, removed, or reclassified under a stable reason.
 
+7.35 KVM_CAP_MEMORY_FAULT_NOWAIT
+--------------------------------
+
+:Architectures: x86, arm64
+:Returns: -EINVAL.
+
+The presence of this capability indicates that userspace may pass the
+KVM_MEM_ABSENT_MAPPING_FAULT flag to KVM_SET_USER_MEMORY_REGION to cause KVM_RUN
+to exit to populate 'kvm_run.memory_fault' and exit to userspace (*) in response
+to page faults for which the userspace page tables do not contain present
+mappings. Attempting to enable the capability directly will fail.
+
+The 'gpa' and 'len' fields of kvm_run.memory_fault will be set to the starting
+address and length (in bytes) of the faulting page. 'flags' will be set to
+KVM_MEMFAULT_REASON_ABSENT_MAPPING.
+
+Userspace should determine how best to make the mapping present, then take
+appropriate action. For instance, in the case of absent mappings this might
+involve establishing the mapping for the first time via UFFDIO_COPY/CONTINUE or
+faulting the mapping in using MADV_POPULATE_READ/WRITE. After establishing the
+mapping, userspace can return to KVM to retry the previous memory access.
+
+(*) NOTE: On x86, KVM_CAP_X86_MEMORY_FAULT_EXIT must be enabled for the
+KVM_MEMFAULT_REASON_ABSENT_MAPPING_reason: otherwise userspace will only receive
+a -EFAULT from KVM_RUN without any useful information.
+
 8. Other capabilities.
 ======================
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d3ccfead73e42..c28330f25526f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -593,6 +593,12 @@ static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *sl
 	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
 }
 
+static inline bool kvm_slot_fault_on_absent_mapping(
+	const struct kvm_memory_slot *slot)
+{
+	return slot->flags & KVM_MEM_ABSENT_MAPPING_FAULT;
+}
+
 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
 {
 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0ba1d7f01346e..2146b27cdd61a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -102,6 +102,7 @@ struct kvm_userspace_memory_region {
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
+#define KVM_MEM_ABSENT_MAPPING_FAULT	(1UL << 2)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -1197,6 +1198,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
 #define KVM_CAP_X86_MEMORY_FAULT_EXIT 227
+#define KVM_CAP_MEMORY_FAULT_NOWAIT 228
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -2252,5 +2254,6 @@ struct kvm_s390_zpci_op {
 
 /* Exit reasons for KVM_EXIT_MEMORY_FAULT */
 #define KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
+#define KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
 
 #endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2b468345f25c3..1a1707d9f442a 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -102,6 +102,7 @@ struct kvm_userspace_memory_region {
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
+#define KVM_MEM_ABSENT_MAPPING_FAULT (1UL << 2)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -2242,5 +2243,6 @@ struct kvm_s390_zpci_op {
 
 /* Exit reasons for KVM_EXIT_MEMORY_FAULT */
 #define KVM_MEMFAULT_REASON_UNKNOWN (1 << 0)
+#define KVM_MEMFAULT_REASON_ABSENT_MAPPING (1 << 1)
 
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 00aec43860ff1..aa3b59410a356 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1525,6 +1525,9 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 
+	if (kvm_vm_ioctl_check_extension(NULL, KVM_CAP_MEMORY_FAULT_NOWAIT))
+		valid_flags |= KVM_MEM_ABSENT_MAPPING_FAULT;
+
 	if (mem->flags & ~valid_flags)
 		return -EINVAL;
 
@@ -6196,7 +6199,9 @@ inline int kvm_memfault_exit_or_efault(
 
 bool kvm_memfault_exit_flags_valid(uint64_t reasons)
 {
-	uint64_t valid_flags = KVM_MEMFAULT_REASON_UNKNOWN;
+	uint64_t valid_flags
+		= KVM_MEMFAULT_REASON_UNKNOWN
+		| KVM_MEMFAULT_REASON_ABSENT_MAPPING;
 
 	return !(reasons & !valid_flags);
 }
-- 
2.40.0.rc1.284.g88254d51c5-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux