If `KVM_GMEM_NO_DIRECT_MAP` is set, all gmem folios are removed from the direct map immediately after allocation. Add a flag to kvm_gmem_grab_folio to overwrite this behavior, and expose it via `kvm_gmem_get_pfn`. Only allow this flag to be set if KVM can actually access gmem (currently only if the vm type is KVM_X86_SW_PROTECTED_VM). KVM_GMEM_GET_PFN_SHARED defers the direct map removal for newly allocated folios until kvm_gmem_put_shared_pfn is called. For existing folios, the direct map entry is temporarily restored until kvm_gmem_put_shared_pfn is called. The folio lock must be held the entire time the folio is present in the direct map, to prevent races with concurrent calls kvm_gmem_folio_set_private that might remove direct map entries while the folios are being accessed by KVM. As this is currently not possible (kvm_gmem_get_pfn always unlocks the folio), the next patch will introduce a KVM_GMEM_GET_PFN_LOCKED flag. Signed-off-by: Patrick Roy <roypat@xxxxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 2 +- include/linux/kvm_host.h | 12 +++++++++-- virt/kvm/guest_memfd.c | 46 +++++++++++++++++++++++++++++++--------- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 901be9e420a4c..cb2f111f2cce0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4349,7 +4349,7 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, } r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn, - &max_order); + &max_order, 0); if (r) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 689e8be873a75..8a2975674de4b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2432,17 +2432,25 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) } #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ +#define KVM_GMEM_GET_PFN_SHARED BIT(0) +#define KVM_GMEM_GET_PFN_PREPARE BIT(31) /* internal */ + #ifdef CONFIG_KVM_PRIVATE_MEM int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order); + gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags); +int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, - kvm_pfn_t *pfn, int *max_order) + kvm_pfn_t *pfn, int *max_order, int flags) { KVM_BUG_ON(1, kvm); return -EIO; } +static inline int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn) +{ + return -EIO; +} #endif /* CONFIG_KVM_PRIVATE_MEM */ #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 2ed27992206f3..492b04f4e5c18 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -55,6 +55,11 @@ static bool kvm_gmem_test_no_direct_map(struct inode *inode) return ((unsigned long)inode->i_private & KVM_GMEM_NO_DIRECT_MAP) == KVM_GMEM_NO_DIRECT_MAP; } +static bool kvm_gmem_test_accessible(struct kvm *kvm) +{ + return kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM; +} + static int kvm_gmem_folio_set_private(struct folio *folio) { unsigned long start, npages, i; @@ -110,10 +115,11 @@ static int kvm_gmem_folio_clear_private(struct folio *folio) return r; } -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, unsigned long flags) { int r; struct folio *folio; + bool share = flags & KVM_GMEM_GET_PFN_SHARED; /* TODO: Support huge pages. */ folio = filemap_grab_folio(inode->i_mapping, index); @@ -139,7 +145,7 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool folio_mark_uptodate(folio); } - if (prepare) { + if (flags & KVM_GMEM_GET_PFN_PREPARE) { r = kvm_gmem_prepare_folio(inode, index, folio); if (r < 0) goto out_err; @@ -148,12 +154,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool if (!kvm_gmem_test_no_direct_map(inode)) goto out; - if (!folio_test_private(folio)) { + if (folio_test_private(folio) && share) { + r = kvm_gmem_folio_clear_private(folio); + } else if (!folio_test_private(folio) && !share) { r = kvm_gmem_folio_set_private(folio); - if (r) - goto out_err; } + if (r) + goto out_err; + out: /* * Ignore accessed, referenced, and dirty flags. The memory is @@ -264,7 +273,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index, true); + folio = kvm_gmem_get_folio(inode, index, KVM_GMEM_GET_PFN_PREPARE); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -624,7 +633,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) } static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) + gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags) { pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; struct kvm_gmem *gmem = file->private_data; @@ -643,7 +652,7 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, return -EIO; } - folio = kvm_gmem_get_folio(file_inode(file), index, prepare); + folio = kvm_gmem_get_folio(file_inode(file), index, flags); if (IS_ERR(folio)) return PTR_ERR(folio); @@ -667,20 +676,37 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, } int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order) + gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags) { struct file *file = kvm_gmem_get_file(slot); int r; + int valid_flags = KVM_GMEM_GET_PFN_SHARED; + + if ((flags & valid_flags) != flags) + return -EINVAL; + + if ((flags & KVM_GMEM_GET_PFN_SHARED) && !kvm_gmem_test_accessible(kvm)) + return -EPERM; if (!file) return -EFAULT; - r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); + r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, flags | KVM_GMEM_GET_PFN_PREPARE); fput(file); return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn) { + struct folio *folio = pfn_folio(pfn); + + if (!kvm_gmem_test_no_direct_map(folio_inode(folio))) + return 0; + + return kvm_gmem_folio_set_private(folio); +} +EXPORT_SYMBOL_GPL(kvm_gmem_put_shared_pfn); + long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { -- 2.46.0