Use one of the 14 reserved u64s in struct kvm_userspace_memory_region2 for the user to provide `userfault_bitmap`. The memslot flag indicates if KVM should be reading from the `userfault_bitmap` field from the memslot. The user is permitted to provide a bogus pointer. If the pointer cannot be read from, we will return -EFAULT (with no other information) back to the user. Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> --- include/linux/kvm_host.h | 14 ++++++++++++++ include/uapi/linux/kvm.h | 4 +++- virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 401439bb21e3..f7a3dfd5e224 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -590,6 +590,7 @@ struct kvm_memory_slot { unsigned long *dirty_bitmap; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; + unsigned long __user *userfault_bitmap; u32 flags; short id; u16 as_id; @@ -724,6 +725,11 @@ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) } #endif +static inline bool kvm_has_userfault(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_USERFAULT); +} + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -2553,4 +2559,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif +int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot, + gfn_t gfn); + +static inline bool kvm_memslot_userfault(struct kvm_memory_slot *memslot) +{ + return memslot->flags & KVM_MEM_USERFAULT; +} + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 343de0a51797..7ade5169d373 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -40,7 +40,8 @@ struct kvm_userspace_memory_region2 { __u64 guest_memfd_offset; __u32 guest_memfd; __u32 pad1; - __u64 pad2[14]; + __u64 userfault_bitmap; + __u64 pad2[13]; }; /* @@ -51,6 +52,7 @@ struct kvm_userspace_memory_region2 { #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) #define KVM_MEM_GUEST_MEMFD (1UL << 2) +#define KVM_MEM_USERFAULT (1UL << 3) /* for KVM_IRQ_LINE */ struct kvm_irq_level { diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 54e959e7d68f..9eb1fae238b1 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -124,3 +124,6 @@ config HAVE_KVM_ARCH_GMEM_PREPARE config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_PRIVATE_MEM + +config HAVE_KVM_USERFAULT + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de2c11dae231..4bceae6a6401 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1541,6 +1541,9 @@ static int check_memory_region_flags(struct kvm *kvm, !(mem->flags & KVM_MEM_GUEST_MEMFD)) valid_flags |= KVM_MEM_READONLY; + if (kvm_has_userfault(kvm)) + valid_flags |= KVM_MEM_USERFAULT; + if (mem->flags & ~valid_flags) return -EINVAL; @@ -1974,6 +1977,12 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; + if (mem->flags & KVM_MEM_USERFAULT && + ((mem->userfault_bitmap != untagged_addr(mem->userfault_bitmap)) || + !access_ok((void __user *)(unsigned long)mem->userfault_bitmap, + DIV_ROUND_UP(mem->memory_size >> PAGE_SHIFT, BITS_PER_LONG) + * sizeof(long)))) + return -EINVAL; slots = __kvm_memslots(kvm, as_id); @@ -2042,6 +2051,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (r) goto out; } + if (mem->flags & KVM_MEM_USERFAULT) + new->userfault_bitmap = + (unsigned long __user *)(unsigned long)mem->userfault_bitmap; r = kvm_set_memslot(kvm, old, new, change); if (r) @@ -6426,3 +6438,26 @@ void kvm_exit(void) kvm_irqfd_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot, + gfn_t gfn) +{ + unsigned long bitmap_chunk = 0; + off_t offset; + + if (!kvm_memslot_userfault(memslot)) + return 0; + + if (WARN_ON_ONCE(!memslot->userfault_bitmap)) + return 0; + + offset = gfn - memslot->base_gfn; + + if (copy_from_user(&bitmap_chunk, + memslot->userfault_bitmap + offset / BITS_PER_LONG, + sizeof(bitmap_chunk))) + return -EFAULT; + + /* Set in the bitmap means that the gfn is userfault */ + return !!(bitmap_chunk & (1ul << (offset % BITS_PER_LONG))); +} -- 2.47.1.613.gc27f4b7a9f-goog