This patch pairs a fd-based memslot to a memory backing store. Two sides handshake to exchange callbacks that will be called later. KVM->memfd: - get_pfn: get or allocate(when alloc is true) page at specified offset in the fd, the page will be locked - put_pfn: put and unlock the pfn memfd->KVM: - invalidate_page_range: called when userspace punch hole on the fd, KVM should unmap related pages in the second MMU - fallocate: called when userspace fallocate space on the fd, KVM can map related pages in the second MMU Currently tmpfs behind memfd interface is supported. Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx> --- arch/x86/kvm/Makefile | 3 +- include/linux/kvm_host.h | 6 +++ virt/kvm/memfd.c | 101 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 virt/kvm/memfd.c diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index f919df73e5e3..5d7f289b1ca0 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -11,7 +11,8 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \ - $(KVM)/dirty_ring.o $(KVM)/binary_stats.o + $(KVM)/dirty_ring.o $(KVM)/binary_stats.o \ + $(KVM)/memfd.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1d4ac0c9b63b..e8646103356b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -769,6 +769,12 @@ static inline void kvm_irqfd_exit(void) { } #endif + +int kvm_memfd_register(struct kvm *kvm, + const struct kvm_userspace_memory_region_ext *mem, + struct kvm_memory_slot *slot); +void kvm_memfd_unregister(struct kvm *kvm, struct kvm_memory_slot *slot); + int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); diff --git a/virt/kvm/memfd.c b/virt/kvm/memfd.c new file mode 100644 index 000000000000..bd930dcb455f --- /dev/null +++ b/virt/kvm/memfd.c @@ -0,0 +1,101 @@ + +// SPDX-License-Identifier: GPL-2.0-only +/* + * memfd.c: routines for fd based guest memory backing store + * Copyright (c) 2021, Intel Corporation. + * + * Author: + * Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx> + */ + +#include <linux/kvm_host.h> +#include <linux/memfd.h> +const static struct guest_mem_ops *memfd_ops; + +static void memfd_invalidate_page_range(struct inode *inode, void *owner, + pgoff_t start, pgoff_t end) +{ + //!!!We can get here after the owner no longer exists +} + +static void memfd_fallocate(struct inode *inode, void *owner, + pgoff_t start, pgoff_t end) +{ + //!!!We can get here after the owner no longer exists +} + +static const struct guest_ops memfd_notifier = { + .invalidate_page_range = memfd_invalidate_page_range, + .fallocate = memfd_fallocate, +}; + +static kvm_pfn_t kvm_memfd_get_pfn(struct kvm_memory_slot *slot, + struct file *file, gfn_t gfn, + bool alloc, int *order) +{ + pgoff_t index = gfn - slot->base_gfn + + (slot->userspace_addr >> PAGE_SHIFT); + + return memfd_ops->get_lock_pfn(file->f_inode, index, alloc, order); +} + +static void kvm_memfd_put_pfn(kvm_pfn_t pfn) +{ + memfd_ops->put_unlock_pfn(pfn); +} + +static struct kvm_memfd_ops kvm_memfd_ops = { + .get_pfn = kvm_memfd_get_pfn, + .put_pfn = kvm_memfd_put_pfn, +}; + +int kvm_memfd_register(struct kvm *kvm, + const struct kvm_userspace_memory_region_ext *mem, + struct kvm_memory_slot *slot) +{ + int ret; + struct fd fd = fdget(mem->fd); + + if (!fd.file) + return -EINVAL; + + ret = memfd_register_guest(fd.file->f_inode, kvm, + &memfd_notifier, &memfd_ops); + if (ret) + return ret; + slot->file = fd.file; + + if (mem->private_fd >= 0) { + fd = fdget(mem->private_fd); + if (!fd.file) { + ret = -EINVAL; + goto err; + } + + ret = memfd_register_guest(fd.file->f_inode, kvm, + &memfd_notifier, &memfd_ops); + if (ret) + goto err; + slot->priv_file = fd.file; + } + + slot->memfd_ops = &kvm_memfd_ops; + return 0; +err: + kvm_memfd_unregister(kvm, slot); + return ret; +} + +void kvm_memfd_unregister(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + if (slot->file) { + fput(slot->file); + slot->file = NULL; + } + + if (slot->priv_file) { + fput(slot->priv_file); + slot->priv_file = NULL; + } + slot->memfd_ops = NULL; +} -- 2.17.1