From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> Introduce kvm fault type to indicate how to handle kvm page fault. It is unfortunate and inflexible for kvm_mmu_do_page_fault() to call kvm_mem_is_private(), eventually looking up memory attributes. Later __kvm_faultin_pfn() looks up memory attributes again. There is a race condition that other threads can change memory attributes due to not gaining the mmu lock. SNP-SEV and TDX define theri way to indicate that the page fault is private. Add KVM fault type, add mmu_private_fault_mask to struct kvm_arch for SNP to determine the fault is private, add gfn_shared_mask to struct kvm_arch for TDX to determine the fault is private. KVM_FAULT_SHARED_ALWAYS is added for the conventional guest to avoid over head to lookup memory attributes. Suggested-by: Michael Roth <michael.roth@xxxxxxx> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> --- Changes v1 -> v2: - Introduced fault type and replaced is_private with fault_type. - Add kvm_get_fault_type() to encapsulate the difference. --- arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/mmu/mmu.c | 26 ++++++++++++++++++++------ arch/x86/kvm/mmu/mmu_internal.h | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8ae131dc645d..5afeefc7a516 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1445,6 +1445,12 @@ struct kvm_arch { */ #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) struct kvm_mmu_memory_cache split_desc_cache; + +#ifdef CONFIG_KVM_PROTECTED_VM + /* To make the patch compile. */ + u64 mmu_private_fault_mask; + gfn_t gfn_shared_mask; +#endif }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b8ba7f11c3cb..feec75515f39 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3174,10 +3174,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, static int __kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, - gfn_t gfn, int max_level, bool is_private) + gfn_t gfn, int max_level, + enum kvm_fault_type fault_type) { struct kvm_lpage_info *linfo; int host_level; + bool is_private = fault_type == KVM_FAULT_PRIVATE; max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { @@ -3228,7 +3230,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault */ fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, fault->gfn, fault->max_level, - fault->is_private); + fault->fault_type); if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; @@ -4328,7 +4330,7 @@ static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; - if (fault->is_private) + if (fault->fault_type == KVM_FAULT_PRIVATE) vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE; else vcpu->run->memory.flags = 0; @@ -4386,10 +4388,22 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return RET_PF_EMULATE; } - if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) - return kvm_do_memory_fault_exit(vcpu, fault); + if (fault->fault_type == KVM_FAULT_SHARED_ALWAYS) { + /* + * The conventional case. Don't lookup memory attributes to + * avoid overhead + */ + fault->fault_type = KVM_FAULT_SHARED; + } else if (fault->fault_type == KVM_FAULT_MEM_ATTR) { + fault->fault_type = kvm_mem_is_private(vcpu->kvm, fault->gfn) ? + KVM_FAULT_PRIVATE : KVM_FAULT_SHARED; + } else { + if ((fault->fault_type == KVM_FAULT_PRIVATE) != + kvm_mem_is_private(vcpu->kvm, fault->gfn)) + return kvm_do_memory_fault_exit(vcpu, fault); + } - if (fault->is_private) + if (fault->fault_type == KVM_FAULT_PRIVATE) return kvm_faultin_pfn_private(vcpu, fault); async = false; diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 7f9ec1e5b136..0ec0b927a391 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm) return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages; } +enum kvm_fault_type { + KVM_FAULT_MEM_ATTR, + KVM_FAULT_SHARED, + KVM_FAULT_SHARED_ALWAYS, + KVM_FAULT_PRIVATE, +}; + struct kvm_page_fault { /* arguments to kvm_mmu_do_page_fault. */ const gpa_t addr; @@ -203,9 +210,10 @@ struct kvm_page_fault { /* Derived from mmu and global state. */ const bool is_tdp; - const bool is_private; const bool nx_huge_page_workaround_enabled; + enum kvm_fault_type fault_type; + /* * Whether a >4KB mapping can be created or is forbidden due to NX * hugepages. @@ -282,6 +290,27 @@ enum { RET_PF_SPURIOUS, }; +static inline enum kvm_fault_type kvm_get_fault_type(struct kvm *kvm, + gpa_t gpa, u64 err) +{ + +#ifdef CONFIG_KVM_PROTECTED_VM + /* SEV-SNP handling */ + if (kvm->arch.mmu_private_fault_mask) + return (err & kvm->arch.mmu_private_fault_mask) ? + KVM_FAULT_PRIVATE : KVM_FAULT_SHARED; + + /* TDX handling */ + if (kvm->arch.gfn_shared_mask) + return (gpa_to_gfn(gpa) & kvm->arch.gfn_shared_mask) ? + KVM_FAULT_SHARED : KVM_FAULT_PRIVATE; +#endif + if (kvm->arch.vm_type == KVM_X86_PROTECTED_VM) + return KVM_FAULT_MEM_ATTR; + /* Don't query memory attributes. */ + return KVM_FAULT_SHARED_ALWAYS; +} + static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err, bool prefetch, int *emulation_type) { @@ -301,7 +330,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, .max_level = KVM_MAX_HUGEPAGE_LEVEL, .req_level = PG_LEVEL_4K, .goal_level = PG_LEVEL_4K, - .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT), + .fault_type = kvm_get_fault_type(vcpu->kvm, cr2_or_gpa, err), }; int r; -- 2.25.1