Add support for mmap() and fault() for guest_memfd in the host. The ability to fault in a guest page is contingent on that page being shared with the host. The guest_memfd PRIVATE memory attribute is not used for two reasons. First because it reflects the userspace expectation for that memory location, and therefore can be toggled by userspace. The second is, although each guest_memfd file has a 1:1 binding with a KVM instance, the plan is to allow multiple files per inode, e.g. to allow intra-host migration to a new KVM instance, without destroying guest_memfd. The mapping is restricted to only memory explicitly shared with the host. KVM checks that the host doesn't have any mappings for private memory via the folio's refcount. To avoid races between paths that check mappability and paths that check whether the host has any mappings (via the refcount), the folio lock is held in while either check is being performed. This new feature is gated with a new configuration option, CONFIG_KVM_GMEM_MAPPABLE. Co-developed-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> Co-developed-by: Elliot Berman <quic_eberman@xxxxxxxxxxx> Signed-off-by: Elliot Berman <quic_eberman@xxxxxxxxxxx> Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> --- The functions kvm_gmem_is_mapped(), kvm_gmem_set_mappable(), and int kvm_gmem_clear_mappable() are not used in this patch series. They are intended to be used in future patches [*], which check and toggle mapability when the guest shares/unshares pages with the host. [*] https://android-kvm.googlesource.com/linux/+/refs/heads/tabba/guestmem-6.13-v5-pkvm --- virt/kvm/Kconfig | 4 ++ virt/kvm/guest_memfd.c | 87 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 54e959e7d68f..59400fd8f539 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -124,3 +124,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_PRIVATE_MEM + +config KVM_GMEM_MAPPABLE + select KVM_PRIVATE_MEM + bool diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 722afd9f8742..159ffa17f562 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -671,9 +671,88 @@ bool kvm_slot_gmem_is_guest_mappable(struct kvm_memory_slot *slot, gfn_t gfn) return gmem_is_guest_mappable(inode, pgoff); } + +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct folio *folio; + vm_fault_t ret = VM_FAULT_LOCKED; + + filemap_invalidate_lock_shared(inode->i_mapping); + + folio = kvm_gmem_get_folio(inode, vmf->pgoff); + if (IS_ERR(folio)) { + ret = VM_FAULT_SIGBUS; + goto out_filemap; + } + + if (folio_test_hwpoison(folio)) { + ret = VM_FAULT_HWPOISON; + goto out_folio; + } + + if (!gmem_is_mappable(inode, vmf->pgoff)) { + ret = VM_FAULT_SIGBUS; + goto out_folio; + } + + if (WARN_ON_ONCE(folio_test_guestmem(folio))) { + ret = VM_FAULT_SIGBUS; + goto out_folio; + } + + if (!folio_test_uptodate(folio)) { + unsigned long nr_pages = folio_nr_pages(folio); + unsigned long i; + + for (i = 0; i < nr_pages; i++) + clear_highpage(folio_page(folio, i)); + + folio_mark_uptodate(folio); + } + + vmf->page = folio_file_page(folio, vmf->pgoff); + +out_folio: + if (ret != VM_FAULT_LOCKED) { + folio_unlock(folio); + folio_put(folio); + } + +out_filemap: + filemap_invalidate_unlock_shared(inode->i_mapping); + + return ret; +} + +static const struct vm_operations_struct kvm_gmem_vm_ops = { + .fault = kvm_gmem_fault, +}; + +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != + (VM_SHARED | VM_MAYSHARE)) { + return -EINVAL; + } + + file_accessed(file); + vm_flags_set(vma, VM_DONTDUMP); + vma->vm_ops = &kvm_gmem_vm_ops; + + return 0; +} +#else +static int gmem_set_mappable(struct inode *inode, pgoff_t start, pgoff_t end) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} +#define kvm_gmem_mmap NULL #endif /* CONFIG_KVM_GMEM_MAPPABLE */ static struct file_operations kvm_gmem_fops = { + .mmap = kvm_gmem_mmap, .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, @@ -860,6 +939,14 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) goto err_gmem; } + if (IS_ENABLED(CONFIG_KVM_GMEM_MAPPABLE)) { + err = gmem_set_mappable(file_inode(file), 0, size >> PAGE_SHIFT); + if (err) { + fput(file); + goto err_gmem; + } + } + kvm_get_kvm(kvm); gmem->kvm = kvm; xa_init(&gmem->bindings); -- 2.48.0.rc2.279.g1de40edade-goog