On Thu, Apr 14, 2022 at 5:51 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > > Add RET_PF_CONTINUE and use it in handle_abnormal_pfn() and > kvm_faultin_pfn() to signal that the page fault handler should continue > doing its thing. Aside from being gross and inefficient, using a boolean > return to signal continue vs. stop makes it extremely difficult to add > more helpers and/or move existing code to a helper. > > E.g. hypothetically, if nested MMUs were to gain a separate page fault > handler in the future, everything up to the "is self-modifying PTE" check > can be shared by all shadow MMUs, but communicating up the stack whether > to continue on or stop becomes a nightmare. > > More concretely, proposed support for private guest memory ran into a > similar issue, where it'll be forced to forego a helper in order to yield > sane code: https://lore.kernel.org/all/YkJbxiL%2FAz7olWlq@xxxxxxxxxx. > > No functional change intended. > > Cc: David Matlack <dmatlack@xxxxxxxxxx> > Cc: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 51 ++++++++++++++------------------- > arch/x86/kvm/mmu/mmu_internal.h | 9 +++++- > arch/x86/kvm/mmu/paging_tmpl.h | 6 ++-- > 3 files changed, 34 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 69a30d6d1e2b..cb2982c6b513 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -2972,14 +2972,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) > return -EFAULT; > } > > -static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > - unsigned int access, int *ret_val) > +static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > + unsigned int access) > { > /* The pfn is invalid, report the error! */ > - if (unlikely(is_error_pfn(fault->pfn))) { > - *ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn); > - return true; > - } > + if (unlikely(is_error_pfn(fault->pfn))) > + return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn); > > if (unlikely(!fault->slot)) { > gva_t gva = fault->is_tdp ? 0 : fault->addr; > @@ -2991,13 +2989,11 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa > * touching the shadow page tables as attempting to install an > * MMIO SPTE will just be an expensive nop. > */ > - if (unlikely(!shadow_mmio_value)) { > - *ret_val = RET_PF_EMULATE; > - return true; > - } > + if (unlikely(!shadow_mmio_value)) > + return RET_PF_EMULATE; > } > > - return false; > + return RET_PF_CONTINUE; > } > > static bool page_fault_can_be_fast(struct kvm_page_fault *fault) > @@ -3888,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); > } > > -static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r) > +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > { > struct kvm_memory_slot *slot = fault->slot; > bool async; > @@ -3899,7 +3895,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > * be zapped before KVM inserts a new MMIO SPTE for the gfn. > */ > if (slot && (slot->flags & KVM_MEMSLOT_INVALID)) > - goto out_retry; > + return RET_PF_RETRY; > > if (!kvm_is_visible_memslot(slot)) { > /* Don't expose private memslots to L2. */ > @@ -3907,7 +3903,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > fault->slot = NULL; > fault->pfn = KVM_PFN_NOSLOT; > fault->map_writable = false; > - return false; > + return RET_PF_CONTINUE; > } > /* > * If the APIC access page exists but is disabled, go directly > @@ -3916,10 +3912,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > * when the AVIC is re-enabled. > */ > if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT && > - !kvm_apicv_activated(vcpu->kvm)) { > - *r = RET_PF_EMULATE; > - return true; > - } > + !kvm_apicv_activated(vcpu->kvm)) > + return RET_PF_EMULATE; > } > > async = false; > @@ -3927,26 +3921,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > fault->write, &fault->map_writable, > &fault->hva); > if (!async) > - return false; /* *pfn has correct page already */ > + return RET_PF_CONTINUE; /* *pfn has correct page already */ > > if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { > trace_kvm_try_async_get_page(fault->addr, fault->gfn); > if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) { > trace_kvm_async_pf_doublefault(fault->addr, fault->gfn); > kvm_make_request(KVM_REQ_APF_HALT, vcpu); > - goto out_retry; > - } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) > - goto out_retry; > + return RET_PF_RETRY; > + } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) { > + return RET_PF_RETRY; > + } > } > > fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL, > fault->write, &fault->map_writable, > &fault->hva); > - return false; > - > -out_retry: > - *r = RET_PF_RETRY; > - return true; > + return RET_PF_CONTINUE; > } > > /* > @@ -4001,10 +3992,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > mmu_seq = vcpu->kvm->mmu_notifier_seq; > smp_rmb(); > > - if (kvm_faultin_pfn(vcpu, fault, &r)) > + r = kvm_faultin_pfn(vcpu, fault); > + if (r != RET_PF_CONTINUE) > return r; > > - if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r)) > + r = handle_abnormal_pfn(vcpu, fault, ACC_ALL); > + if (r != RET_PF_CONTINUE) > return r; > > r = RET_PF_RETRY; > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index 1bff453f7cbe..c0e502b17ef7 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -143,6 +143,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head); > /* > * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault(). > * > + * RET_PF_CONTINUE: So far, so good, keep handling the page fault. > * RET_PF_RETRY: let CPU fault again on the address. > * RET_PF_EMULATE: mmio page fault, emulate the instruction directly. > * RET_PF_INVALID: the spte is invalid, let the real page fault path update it. RET_PF_CONTINUE and RET_PF_INVALID are pretty similar, they both indicate to the PF handler that it should keep going. What do you think about taking this a step further and removing RET_PF_INVALID and RET_PF_CONTINUE and using 0 instead? That way we can replace: if (ret != RET_PF_CONTINUE) return r; and if (ret != RET_PF_INVALID) return r; with if (r) return r; ? > @@ -151,9 +152,15 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head); > * > * Any names added to this enum should be exported to userspace for use in > * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h Please add RET_PF_CONTINUE to mmutrace.h, e.g. TRACE_DEFINE_ENUM(RET_PF_CONTINUE); It doesn't matter in practice (yet) since the trace enums are only used for trace_fast_page_fault, which does not return RET_PF_CONTINUE. But it would be good to keep it up to date. > + * > + * Note, all values must be greater than or equal to zero so as not to encroach > + * on -errno return values. Somewhat arbitrarily use '0' for CONTINUE, which > + * will allow for efficient machine code when checking for CONTINUE, e.g. > + * "TEST %rax, %rax, JNZ", as all "stop!" values are non-zero. > */ > enum { > - RET_PF_RETRY = 0, > + RET_PF_CONTINUE = 0, > + RET_PF_RETRY, > RET_PF_EMULATE, > RET_PF_INVALID, > RET_PF_FIXED, > diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h > index 66f1acf153c4..7038273d04ab 100644 > --- a/arch/x86/kvm/mmu/paging_tmpl.h > +++ b/arch/x86/kvm/mmu/paging_tmpl.h > @@ -838,10 +838,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > mmu_seq = vcpu->kvm->mmu_notifier_seq; > smp_rmb(); > > - if (kvm_faultin_pfn(vcpu, fault, &r)) > + r = kvm_faultin_pfn(vcpu, fault); > + if (r != RET_PF_CONTINUE) > return r; > > - if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r)) > + r = handle_abnormal_pfn(vcpu, fault, walker.pte_access); > + if (r != RET_PF_CONTINUE) > return r; > > /* > > base-commit: 150866cd0ec871c765181d145aa0912628289c8a > -- > 2.36.0.rc0.470.gd361397f0d-goog >