If KVM can access guest_memfd memory (or at least convert it into a state in which KVM can access it) without causing a host-kernel panic (e.g. currently only if the vm type is KVM_X86_SW_PROTECTED_VM), allow `kvm_{read,write}_guest` to access gfns that are backed by gmem. If KVM cannot access guest_memfd memory (say, because it is running a TDX VM), prepare a KVM_EXIT_MEMORY_FAULT (if possible) and return -EFAULT. KVM can only prepare the memory fault exit inside the `kvm_vcpu_{read,write}_guest` variant, as it needs a vcpu reference to assign the exit reason to. KVM accesses to gmem are done via the direct map (as no userspace mappings exist, and even if they existed, they wouldn't be reflected into the memslots). If `KVM_GMEM_NO_DIRECT_MAP` is set, then temporarily reinsert the accessed folio into the direct map. Hold the folio lock for the entire duration of the access to prevent concurrent direct map modifications from taking place (as these might remove the direct map entry while kvm_{read,write}_guest is using it, which would result in a panic). Signed-off-by: Patrick Roy <roypat@xxxxxxxxxxxx> --- virt/kvm/kvm_main.c | 83 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d0788d0a72cc0..13347fb03d4a9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3286,11 +3286,51 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn, return 0; } +static int __kvm_read_guest_private_page(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn, + void *data, int offset, int len) +{ + kvm_pfn_t pfn; + int r; + struct folio *folio; + + r = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, NULL, + KVM_GMEM_GET_PFN_SHARED | KVM_GMEM_GET_PFN_LOCKED); + + if (r < 0) + return r; + + folio = pfn_folio(pfn); + memcpy(data, folio_address(folio) + offset, len); + r = kvm_gmem_put_shared_pfn(pfn); + folio_unlock(folio); + folio_put(folio); + return r; +} + +static int __kvm_vcpu_read_guest_private_page(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *memslot, gfn_t gfn, + void *data, int offset, int len) +{ + int r = __kvm_read_guest_private_page(vcpu->kvm, memslot, gfn, data, offset, len); + + /* kvm not allowed to access gmem */ + if (r == -EPERM) { + kvm_prepare_memory_fault_exit(vcpu, gfn + offset, len, false, + false, true); + return -EFAULT; + } + + return r; +} + int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (kvm_mem_is_private(kvm, gfn)) + return __kvm_read_guest_private_page(kvm, slot, gfn, data, offset, len); return __kvm_read_guest_page(slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_read_guest_page); @@ -3300,6 +3340,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (kvm_mem_is_private(vcpu->kvm, gfn)) + return __kvm_vcpu_read_guest_private_page(vcpu, slot, gfn, data, offset, len); return __kvm_read_guest_page(slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page); @@ -3390,11 +3432,50 @@ static int __kvm_write_guest_page(struct kvm *kvm, return 0; } +static int __kvm_write_guest_private_page(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn, + const void *data, int offset, int len) +{ + kvm_pfn_t pfn; + int r; + struct folio *folio; + + r = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, NULL, + KVM_GMEM_GET_PFN_SHARED | KVM_GMEM_GET_PFN_LOCKED); + + if (r < 0) + return r; + + folio = pfn_folio(pfn); + memcpy(folio_address(folio) + offset, data, len); + r = kvm_gmem_put_shared_pfn(pfn); + folio_unlock(folio); + folio_put(folio); + return r; +} + +static int __kvm_vcpu_write_guest_private_page(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *memslot, gfn_t gfn, + const void *data, int offset, int len) +{ + int r = __kvm_write_guest_private_page(vcpu->kvm, memslot, gfn, data, offset, len); + + if (r == -EPERM) { + kvm_prepare_memory_fault_exit(vcpu, gfn + offset, len, true, + false, true); + return -EFAULT; + } + + return r; +} + int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (kvm_mem_is_private(kvm, gfn)) + return __kvm_write_guest_private_page(kvm, slot, gfn, data, offset, len); return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -3404,6 +3485,8 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (kvm_mem_is_private(vcpu->kvm, gfn)) + return __kvm_vcpu_write_guest_private_page(vcpu, slot, gfn, data, offset, len); return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); -- 2.46.0