On Thu, Feb 17, 2022, Paolo Bonzini wrote: > Right now, PGD caching avoids placing a PAE root in the cache by using the > old value of mmu->root_level and mmu->shadow_root_level; it does not look > for a cached PGD if the old root is a PAE one, and then frees it using > kvm_mmu_free_roots. > > Change the logic instead to free the uncacheable root early. > This way, __kvm_new_mmu_pgd is able to look up the cache when going from > 32-bit to 64-bit (if there is a hit, the invalid root becomes the least > recently used). An example of this is nested virtualization with shadow > paging, when a 64-bit L1 runs a 32-bit L2. > > As a side effect (which is actually the reason why this patch was > written), PGD caching does not use the old value of mmu->root_level > and mmu->shadow_root_level anymore. Maybe another blurb on 5=>4-level nNPT being broken? I'm also ok omitting it. > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- Nits aside, Reviewed-by: Sean Christopherson <seanjc@xxxxxxxxxx> > +static bool cached_root_find_and_keep_current(struct kvm *kvm, struct kvm_mmu *mmu, > + gpa_t new_pgd, > + union kvm_mmu_page_role new_role) > { > uint i; > - struct kvm_mmu *mmu = vcpu->arch.mmu; > > if (is_root_usable(&mmu->root, new_pgd, new_role)) > return true; > > for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { > + /* > + * The swaps end up rotating the cache like this: > + * C 0 1 2 3 (on entry to the function) > + * 0 C 1 2 3 > + * 1 C 0 2 3 > + * 2 C 0 1 3 > + * 3 C 0 1 2 (on exit from the loop) > + */ > swap(mmu->root, mmu->prev_roots[i]); > - I'd prefer we keep this whitespace, I like that it separates the swap() and its comment from the usability check. > if (is_root_usable(&mmu->root, new_pgd, new_role)) > - break; > + return true; > } > > - return i < KVM_MMU_NUM_PREV_ROOTS; > + kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); > + return false; > } > > -static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd, > - union kvm_mmu_page_role new_role) > +/* > + * Find out if a previously cached root matching the new pgd/role is available. > + * On entry, mmu->root is invalid. > + * If a matching root is found, it is assigned to kvm_mmu->root, the LRU entry > + * of the cache becomes invalid, and true is returned. > + * If no match is found, kvm_mmu->root is left invalid and false is returned. > + */ > +static bool cached_root_find_without_current(struct kvm *kvm, struct kvm_mmu *mmu, > + gpa_t new_pgd, > + union kvm_mmu_page_role new_role) > { > - struct kvm_mmu *mmu = vcpu->arch.mmu; > + uint i; > + > + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) > + if (is_root_usable(&mmu->prev_roots[i], new_pgd, new_role)) > + goto hit; The for-loop needs curly braces. > > + return false; > + > +hit: > + swap(mmu->root, mmu->prev_roots[i]); > + /* Bubble up the remaining roots. */ > + for (; i < KVM_MMU_NUM_PREV_ROOTS - 1; i++) > + mmu->prev_roots[i] = mmu->prev_roots[i + 1]; > + mmu->prev_roots[i].hpa = INVALID_PAGE; > + return true; > +} > + > +static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu, > + gpa_t new_pgd, union kvm_mmu_page_role new_role) > +{ > /* > - * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid > + * For now, limit the caching to 64-bit hosts+VMs in order to avoid > * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs > * later if necessary. > */ > - if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && > - mmu->root_level >= PT64_ROOT_4LEVEL) > - return cached_root_available(vcpu, new_pgd, new_role); > + if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa)) > + kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT); > > - return false; > + if (VALID_PAGE(mmu->root.hpa)) > + return cached_root_find_and_keep_current(kvm, mmu, new_pgd, new_role); > + else > + return cached_root_find_without_current(kvm, mmu, new_pgd, new_role); > } > > static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, > @@ -4160,8 +4196,8 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, > { > struct kvm_mmu *mmu = vcpu->arch.mmu; > > - if (!fast_pgd_switch(vcpu, new_pgd, new_role)) { > - kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT); > + if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) { > + /* kvm_mmu_ensure_valid_pgd will set up a new root. */ The "kvm_mmu_ensure_valid_pgd" part is stale due to the bikeshedding stalemate. Maybe reference vcpu_enter_guest() instead? E.g. /* * If no usable root is found there's nothing more to do, a new root * will be set up during vcpu_enter_guest(), prior to the next VM-Enter. */ if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) return;