From: Brijesh Singh <brijesh.singh@xxxxxxx> When running an SEV-SNP VM, the sPA used to index the RMP entry is obtained through the NPT translation (gva->gpa->spa). The NPT page level is checked against the page level programmed in the RMP entry. If the page level does not match, then it will cause a nested page fault with the RMP bit set to indicate the RMP violation. Co-developed-by: Michael Roth <michael.roth@xxxxxxx> Signed-off-by: Michael Roth <michael.roth@xxxxxxx> Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> Signed-off-by: Jarkko Sakkinen <jarkko@xxxxxxxxxxx> Signed-off-by: Ashish Kalra <Ashish.Kalra@xxxxxxx> --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu/mmu.c | 9 ++++++ arch/x86/kvm/svm/sev.c | 51 ++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/svm/svm.h | 1 + 6 files changed, 66 insertions(+) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index e116405cbb5f..87a087ec3277 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -135,6 +135,7 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(fault_is_private); KVM_X86_OP_OPTIONAL_RET0(update_mem_attr) KVM_X86_OP_OPTIONAL(invalidate_restricted_mem) +KVM_X86_OP_OPTIONAL(adjust_mapping_level) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a9363a6f779d..456b42cb167b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1731,6 +1731,8 @@ struct kvm_x86_ops { unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); + + void (*adjust_mapping_level)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 360af0c9997e..d8e5254f314d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3081,6 +3081,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, out: local_irq_restore(flags); + return level; } @@ -3141,6 +3142,14 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, fault->gfn, fault->max_level, fault->is_private); + if (kvm_slot_can_be_private(slot)) { + int req_level = fault->req_level; + + static_call_cond(kvm_x86_adjust_mapping_level)(vcpu->kvm, fault->gfn, fault->pfn, + &req_level); + fault->req_level = req_level; + } + if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 515e22d0dc30..e8740c35be39 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3749,3 +3749,54 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) return p; } + +static bool is_gfn_range_shared(struct kvm *kvm, gfn_t start, gfn_t end) +{ + while (start++ < end) + if (kvm_mem_is_private(kvm, start)) + return false; + + return true; +} + +void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level) +{ + int assigned; + int rmp_level = 1; + int level_orig = *level; + + if (!sev_snp_guest(kvm)) + return; + + /* If there's an error retrieving RMP entry, stick with 4K mappings */ + assigned = snp_lookup_rmpentry(pfn, &rmp_level); + if (unlikely(assigned < 0)) + goto out_adjust; + + if (!assigned) { + gfn_t huge_gfn; + + /* + * If all the pages are shared then no need to keep the RMP + * and NPT in sync. + */ + huge_gfn = gfn & ~(PTRS_PER_PMD - 1); + if (is_gfn_range_shared(kvm, huge_gfn, huge_gfn + PTRS_PER_PMD)) + goto out; + } + + /* + * The hardware installs 2MB TLB entries to access to 1GB pages, + * therefore allow NPT to use 1GB pages when pfn was added as 2MB + * in the RMP table. + */ + if (rmp_level == PG_LEVEL_2M && (*level == PG_LEVEL_1G)) + goto out; + +out_adjust: + /* Adjust the level to keep the NPT and RMP in sync */ + *level = min_t(size_t, *level, rmp_level); +out: + pr_debug("%s: GFN: 0x%llx, PFN: 0x%llx, level: %d, rmp_level: %d, level_orig: %d, assigned: %d\n", + __func__, gfn, pfn, *level, rmp_level, level_orig, assigned); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1061aaf66f0a..9eb750c8b04c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4835,6 +4835,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, .vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons, .alloc_apic_backing_page = svm_alloc_apic_backing_page, + + .adjust_mapping_level = sev_adjust_mapping_level, }; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 740969b57425..cbd4594f1cca 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -706,6 +706,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); void sev_es_unmap_ghcb(struct vcpu_svm *svm); struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); +void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level); /* vmenter.S */ -- 2.25.1