From: Brijesh Singh <brijesh.singh@xxxxxxx> When running an SEV-SNP VM, the sPA used to index the RMP entry is obtained through the NPT translation (gva->gpa->spa). The NPT page level is checked against the page level programmed in the RMP entry. If the page level does not match, then it will cause a nested page fault with the RMP bit set to indicate the RMP violation. Co-developed-by: Michael Roth <michael.roth@xxxxxxx> Signed-off-by: Michael Roth <michael.roth@xxxxxxx> Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> Signed-off-by: Jarkko Sakkinen <jarkko@xxxxxxxxxxx> Signed-off-by: Ashish Kalra <Ashish.Kalra@xxxxxxx> --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/mmu/mmu.c | 12 +++++- arch/x86/kvm/svm/sev.c | 66 ++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 + arch/x86/kvm/svm/svm.h | 1 + 6 files changed, 83 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index e0015926cdf4..61e31b622fce 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -136,6 +136,7 @@ KVM_X86_OP_OPTIONAL_RET0(private_mem_enabled); KVM_X86_OP_OPTIONAL_RET0(fault_is_private); KVM_X86_OP_OPTIONAL_RET0(update_mem_attr) KVM_X86_OP_OPTIONAL(invalidate_restricted_mem) +KVM_X86_OP_OPTIONAL(rmp_page_level_adjust) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e2529415f28b..b126c6ac7ce4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1724,6 +1724,8 @@ struct kvm_x86_ops { unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); + + void (*rmp_page_level_adjust)(struct kvm *kvm, gfn_t gfn, int *level); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2713632e5061..25db83021500 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3053,6 +3053,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, out: local_irq_restore(flags); + + /* Adjust the page level based on the SEV-SNP RMP page level. */ + if (kvm_x86_ops.rmp_page_level_adjust) + static_call(kvm_x86_rmp_page_level_adjust)(kvm, gfn, &level); + return level; } @@ -3070,8 +3075,13 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, break; } - if (is_private) + pr_debug("%s: gfn: %llx max_level: %d max_huge_page_level: %d\n", + __func__, gfn, max_level, max_huge_page_level); + if (kvm_slot_can_be_private(slot) && is_private) { + if (kvm_x86_ops.rmp_page_level_adjust) + static_call(kvm_x86_rmp_page_level_adjust)(kvm, gfn, &max_level); return max_level; + } if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6f901545bed9..443c5c8aaaf3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3710,6 +3710,72 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) return p; } +static bool is_pfn_range_shared(kvm_pfn_t start, kvm_pfn_t end) +{ + int level; + + while (end > start) { + if (snp_lookup_rmpentry(start, &level) != 0) + return false; + start++; + } + + return true; +} + +void sev_rmp_page_level_adjust(struct kvm *kvm, gfn_t gfn, int *level) +{ + struct kvm_memory_slot *slot; + int ret, order, assigned; + int rmp_level = 1; + kvm_pfn_t pfn; + + slot = gfn_to_memslot(kvm, gfn); + if (!kvm_slot_can_be_private(slot)) + return; + + ret = kvm_restricted_mem_get_pfn(slot, gfn, &pfn, &order); + if (ret) { + pr_warn_ratelimited("Failed to adjust RMP page level, unable to obtain private PFN, rc: %d\n", + ret); + *level = PG_LEVEL_4K; + return; + } + + /* If there's an error retrieving RMP entry, stick with 4K mappings */ + assigned = snp_lookup_rmpentry(pfn, &rmp_level); + if (unlikely(assigned < 0)) + goto out_adjust; + + if (!assigned) { + kvm_pfn_t huge_pfn; + + /* + * If all the pages are shared then no need to keep the RMP + * and NPT in sync. + */ + huge_pfn = pfn & ~(PTRS_PER_PMD - 1); + if (is_pfn_range_shared(huge_pfn, huge_pfn + PTRS_PER_PMD)) + goto out; + } + + /* + * The hardware installs 2MB TLB entries to access to 1GB pages, + * therefore allow NPT to use 1GB pages when pfn was added as 2MB + * in the RMP table. + */ + if (rmp_level == PG_LEVEL_2M && (*level == PG_LEVEL_1G)) + goto out; + +out_adjust: + /* Adjust the level to keep the NPT and RMP in sync */ + *level = min_t(size_t, *level, rmp_level); +out: + put_page(pfn_to_page(pfn)); + pr_debug("%s: GFN: 0x%llx, level: %d, rmp_level: %d, ret: %d\n", + __func__, gfn, *level, rmp_level, ret); +} + int sev_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault) { gfn_t gfn = gpa_to_gfn(gpa); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 013f811c733c..2dfa150bcb09 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4843,6 +4843,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .alloc_apic_backing_page = svm_alloc_apic_backing_page, .fault_is_private = sev_fault_is_private, + + .rmp_page_level_adjust = sev_rmp_page_level_adjust, }; /* diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 17200c1ad20e..ae733188cf87 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -698,6 +698,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); void sev_es_unmap_ghcb(struct vcpu_svm *svm); struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); +void sev_rmp_page_level_adjust(struct kvm *kvm, gfn_t gfn, int *level); int sev_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault); -- 2.25.1