On Wed, Nov 16, 2022 at 10:13:07PM +0000, Sean Christopherson wrote: > On Wed, Nov 16, 2022, Ackerley Tng wrote: > > >@@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > > > return RET_PF_EMULATE; > > > } > > > > > >+ if (kvm_slot_can_be_private(slot) && > > >+ fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { > > >+ vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; > > >+ if (fault->is_private) > > >+ vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE; > > >+ else > > >+ vcpu->run->memory.flags = 0; > > >+ vcpu->run->memory.padding = 0; > > >+ vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT; > > >+ vcpu->run->memory.size = PAGE_SIZE; > > >+ return RET_PF_USER; > > >+ } > > >+ > > >+ if (fault->is_private) > > >+ return kvm_faultin_pfn_private(fault); > > >+ > > > > Since each memslot may also not be backed by restricted memory, we > > should also check if the memslot has been set up for private memory > > with > > > > if (fault->is_private && kvm_slot_can_be_private(slot)) > > return kvm_faultin_pfn_private(fault); > > > > Without this check, restrictedmem_get_page will get called with NULL > > in slot->restricted_file, which causes a NULL pointer dereference. > > Hmm, silently skipping the faultin would result in KVM faulting in the shared > portion of the memslot, and I believe would end up mapping that pfn as private, > i.e. would map a non-UPM PFN as a private mapping. For TDX and SNP, that would > be double ungood as it would let the host access memory that is mapped private, > i.e. lead to #MC or #PF(RMP) in the host. That's correct. > > I believe the correct solution is to drop the "can be private" check from the > above check, and instead handle that in kvm_faultin_pfn_private(). That would fix > another bug, e.g. if the fault is shared, the slot can't be private, but for > whatever reason userspace marked the gfn as private. Even though KVM might be > able service the fault, the correct thing to do in that case is to exit to userspace. It makes sense to me. Chao > > E.g. > > --- > arch/x86/kvm/mmu/mmu.c | 36 ++++++++++++++++++++++-------------- > 1 file changed, 22 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 10017a9f26ee..e2ac8873938e 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4158,11 +4158,29 @@ static inline u8 order_to_level(int order) > return PG_LEVEL_4K; > } > > -static int kvm_faultin_pfn_private(struct kvm_page_fault *fault) > +static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu, > + struct kvm_page_fault *fault) > +{ > + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; > + if (fault->is_private) > + vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE; > + else > + vcpu->run->memory.flags = 0; > + vcpu->run->memory.padding = 0; > + vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT; > + vcpu->run->memory.size = PAGE_SIZE; > + return RET_PF_USER; > +} > + > +static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, > + struct kvm_page_fault *fault) > { > int order; > struct kvm_memory_slot *slot = fault->slot; > > + if (kvm_slot_can_be_private(slot)) > + return kvm_do_memory_fault_exit(vcpu, fault); > + > if (kvm_restricted_mem_get_pfn(slot, fault->gfn, &fault->pfn, &order)) > return RET_PF_RETRY; > > @@ -4203,21 +4221,11 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > return RET_PF_EMULATE; > } > > - if (kvm_slot_can_be_private(slot) && > - fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { > - vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; > - if (fault->is_private) > - vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE; > - else > - vcpu->run->memory.flags = 0; > - vcpu->run->memory.padding = 0; > - vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT; > - vcpu->run->memory.size = PAGE_SIZE; > - return RET_PF_USER; > - } > + if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) > + return kvm_do_memory_fault_exit(vcpu, fault); > > if (fault->is_private) > - return kvm_faultin_pfn_private(fault); > + return kvm_faultin_pfn_private(vcpu, fault); > > async = false; > fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, &async, > > base-commit: 969d761bb7b8654605937f31ae76123dcb7f15a3 > --