While the direct MMU can handle page faults much faster than the existing implementation, it cannot handle faults caused by write protection or access tracking as quickly. Add a fast path similar to the existing fast path to handle these cases without the MMU read lock or calls to get_user_pages. Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/kvm/mmu.c | 93 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f3a26a32c8174..3d4a78f2461a9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4490,6 +4490,93 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, return fault_handled; } +/* + * Attempt to handle a page fault without the use of get_user_pages, or + * acquiring the MMU lock. This function can handle page faults resulting from + * missing permissions on a PTE, set up by KVM for dirty logging or access + * tracking. + * + * Return value: + * - true: The page fault may have been fixed by this function. Let the vCPU + * access on the same address again. + * - false: This function cannot handle the page fault. Let the full page fault + * path fix it. + */ +static bool fast_direct_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int level, + u32 error_code) +{ + struct direct_walk_iterator iter; + bool fault_handled = false; + bool remove_write_prot; + bool remove_acc_track; + u64 new_pte; + + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) + return false; + + if (!page_fault_can_be_fast(error_code)) + return false; + + direct_walk_iterator_setup_walk(&iter, vcpu->kvm, + kvm_arch_vcpu_memslots_id(vcpu), gpa >> PAGE_SHIFT, + (gpa >> PAGE_SHIFT) + 1, MMU_NO_LOCK); + while (direct_walk_iterator_next_present_leaf_pte(&iter)) { + remove_write_prot = (error_code & PFERR_WRITE_MASK); + remove_write_prot &= !(iter.old_pte & PT_WRITABLE_MASK); + remove_write_prot &= spte_can_locklessly_be_made_writable( + iter.old_pte); + + remove_acc_track = is_access_track_spte(iter.old_pte); + + /* Verify that the fault can be handled in the fast path */ + if (!remove_acc_track && !remove_write_prot) + break; + + /* + * If dirty logging is enabled: + * + * Do not fix write-permission on the large spte since we only + * dirty the first page into the dirty-bitmap in + * fast_pf_fix_direct_spte() that means other pages are missed + * if its slot is dirty-logged. + * + * Instead, we let the slow page fault path create a normal spte + * to fix the access. + * + * See the comments in kvm_arch_commit_memory_region(). + */ + if (remove_write_prot && + iter.level > PT_PAGE_TABLE_LEVEL) + break; + + new_pte = iter.old_pte; + if (remove_acc_track) + new_pte = restore_acc_track_spte(iter.old_pte); + if (remove_write_prot) + new_pte |= PT_WRITABLE_MASK; + + if (new_pte == iter.old_pte) { + fault_handled = true; + break; + } + + if (!direct_walk_iterator_set_pte(&iter, new_pte)) + continue; + + if (remove_write_prot) + kvm_vcpu_mark_page_dirty(vcpu, iter.pte_gfn_start); + + fault_handled = true; + break; + } + direct_walk_iterator_end_traversal(&iter); + + trace_fast_page_fault(vcpu, gpa, error_code, iter.ptep, + iter.old_pte, fault_handled); + + return fault_handled; +} + static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); static int make_mmu_pages_available(struct kvm_vcpu *vcpu); @@ -5182,9 +5269,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); } - if (!vcpu->kvm->arch.direct_mmu_enabled) + if (vcpu->kvm->arch.direct_mmu_enabled) { + if (fast_direct_page_fault(vcpu, gpa, level, error_code)) + return RET_PF_RETRY; + } else { if (fast_page_fault(vcpu, gpa, level, error_code)) return RET_PF_RETRY; + } mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); -- 2.23.0.444.g18eeb5a265-goog