Fuad Tabba <tabba@xxxxxxxxxx> writes: > Add support for mmap() and fault() for guest_memfd backed memory > in the host for VMs that support in-place conversion between > shared and private. To that end, this patch adds the ability to > check whether the VM type supports in-place conversion, and only > allows mapping its memory if that's the case. > > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which > indicates that the VM supports shared memory in guest_memfd, or > that the host can create VMs that support shared memory. > Supporting shared memory implies that memory can be mapped when > shared with the host. > > This is controlled by the KVM_GMEM_SHARED_MEM configuration > option. > > Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> > --- > include/linux/kvm_host.h | 11 +++++ > include/uapi/linux/kvm.h | 1 + > virt/kvm/guest_memfd.c | 102 +++++++++++++++++++++++++++++++++++++++ > virt/kvm/kvm_main.c | 4 ++ > 4 files changed, 118 insertions(+) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 3ad0719bfc4f..601bbcaa5e41 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) > } > #endif > > +/* > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for > + * private memory is enabled and it supports in-place shared/private conversion. > + */ > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm) > +{ > + return false; > +} > +#endif > + > #ifndef kvm_arch_has_readonly_mem > static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) > { > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 45e6d8fca9b9..117937a895da 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -929,6 +929,7 @@ struct kvm_enable_cap { > #define KVM_CAP_PRE_FAULT_MEMORY 236 > #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 > #define KVM_CAP_X86_GUEST_MODE 238 > +#define KVM_CAP_GMEM_SHARED_MEM 239 > > struct kvm_irq_routing_irqchip { > __u32 irqchip; > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 5fc414becae5..eea44e003ed1 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -320,7 +320,109 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) > return gfn - slot->base_gfn + slot->gmem.pgoff; > } > > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM > +static bool folio_offset_is_shared(const struct folio *folio, struct file *file, pgoff_t index) > +{ > + struct kvm_gmem *gmem = file->private_data; > + > + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); I should've commented on this in the last series, but why must folio lock be held to check if this offset is shared? I was thinking to use the filemap's lock (filemap_invalidate_lock()) to guard mappability races. Does that work too? > + > + /* For now, VMs that support shared memory share all their memory. */ > + return kvm_arch_gmem_supports_shared_mem(gmem->kvm); > +} > + > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) > +{ > + struct inode *inode = file_inode(vmf->vma->vm_file); > + struct folio *folio; > + vm_fault_t ret = VM_FAULT_LOCKED; > + > + filemap_invalidate_lock_shared(inode->i_mapping); > + > + folio = kvm_gmem_get_folio(inode, vmf->pgoff); > + if (IS_ERR(folio)) { > + int err = PTR_ERR(folio); > + > + if (err == -EAGAIN) > + ret = VM_FAULT_RETRY; > + else > + ret = vmf_error(err); > + > + goto out_filemap; > + } > + > + if (folio_test_hwpoison(folio)) { > + ret = VM_FAULT_HWPOISON; > + goto out_folio; > + } > + > + if (!folio_offset_is_shared(folio, vmf->vma->vm_file, vmf->pgoff)) { > + ret = VM_FAULT_SIGBUS; > + goto out_folio; > + } > + > + /* > + * Shared folios would not be marked as "guestmem" so far, and we only > + * expect shared folios at this point. > + */ > + if (WARN_ON_ONCE(folio_test_guestmem(folio))) { > + ret = VM_FAULT_SIGBUS; > + goto out_folio; > + } > + > + /* No support for huge pages. */ > + if (WARN_ON_ONCE(folio_test_large(folio))) { > + ret = VM_FAULT_SIGBUS; > + goto out_folio; > + } > + > + if (!folio_test_uptodate(folio)) { > + clear_highpage(folio_page(folio, 0)); > + kvm_gmem_mark_prepared(folio); > + } > + > + vmf->page = folio_file_page(folio, vmf->pgoff); > + > +out_folio: > + if (ret != VM_FAULT_LOCKED) { > + folio_unlock(folio); > + folio_put(folio); > + } > + > +out_filemap: > + filemap_invalidate_unlock_shared(inode->i_mapping); > + > + return ret; > +} > + > <snip>