Adds a function for zapping ranges of GFNs in an address space which uses the paging structure iterator and uses the function to support invalidate_zap_all_pages for the direct MMU. Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 234db5f4246a4..f0696658b527c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2120,7 +2120,6 @@ static void direct_walk_iterator_reset_traversal( * range, so the last gfn to be interated over would be the largest possible * GFN, in this scenario.) */ -__attribute__((unused)) static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter, struct kvm *kvm, int as_id, gfn_t start, gfn_t end, enum mmu_lock_mode lock_mode) @@ -2151,7 +2150,6 @@ static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter, direct_walk_iterator_start_traversal(iter); } -__attribute__((unused)) static void direct_walk_iterator_retry_pte(struct direct_walk_iterator *iter) { BUG_ON(!iter->walk_in_progress); @@ -2397,7 +2395,6 @@ static bool cmpxchg_pte(u64 *ptep, u64 old_pte, u64 new_pte, int level, u64 gfn) return r == old_pte; } -__attribute__((unused)) static bool direct_walk_iterator_set_pte(struct direct_walk_iterator *iter, u64 new_pte) { @@ -2725,6 +2722,44 @@ static int kvm_handle_hva_range(struct kvm *kvm, return ret; } +/* + * Marks the range of gfns, [start, end), non-present. + */ +static bool zap_direct_gfn_range(struct kvm *kvm, int as_id, gfn_t start, + gfn_t end, enum mmu_lock_mode lock_mode) +{ + struct direct_walk_iterator iter; + + direct_walk_iterator_setup_walk(&iter, kvm, as_id, start, end, + lock_mode); + while (direct_walk_iterator_next_present_pte(&iter)) { + /* + * The gfn range should be handled at the largest granularity + * possible, however since the functions which handle changed + * PTEs (and freeing child PTs) will not yield, zapping an + * entry with too many child PTEs can lead to scheduler + * problems. In order to avoid scheduler problems, only zap + * PTEs at PDPE level and lower. The root level entries will be + * zapped and the high level page table pages freed on VM + * teardown. + */ + if ((iter.pte_gfn_start < start || + iter.pte_gfn_end > end || + iter.level > PT_PDPE_LEVEL) && + !is_last_spte(iter.old_pte, iter.level)) + continue; + + /* + * If the compare / exchange succeeds, then we will continue on + * to the next pte. If it fails, the next iteration will repeat + * the current pte. We'll handle both cases in the same way, so + * we don't need to check the result here. + */ + direct_walk_iterator_set_pte(&iter, 0); + } + return direct_walk_iterator_end_traversal(&iter); +} + static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, int (*handler)(struct kvm *kvm, @@ -6645,11 +6680,26 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) */ static void kvm_mmu_zap_all_fast(struct kvm *kvm) { + int i; + lockdep_assert_held(&kvm->slots_lock); write_lock(&kvm->mmu_lock); trace_kvm_mmu_zap_all_fast(kvm); + /* Zap all direct MMU PTEs slowly */ + if (kvm->arch.direct_mmu_enabled) { + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + zap_direct_gfn_range(kvm, i, 0, ~0ULL, + MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED); + } + + if (kvm->arch.pure_direct_mmu) { + kvm_flush_remote_tlbs(kvm); + write_unlock(&kvm->mmu_lock); + return; + } + /* * Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is * held for the entire duration of zapping obsolete pages, it's @@ -6888,8 +6938,21 @@ void kvm_mmu_zap_all(struct kvm *kvm) struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); int ign; + int i; write_lock(&kvm->mmu_lock); + if (kvm->arch.direct_mmu_enabled) { + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + zap_direct_gfn_range(kvm, i, 0, ~0ULL, + MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED); + kvm_flush_remote_tlbs(kvm); + } + + if (kvm->arch.pure_direct_mmu) { + write_unlock(&kvm->mmu_lock); + return; + } + restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { if (sp->role.invalid && sp->root_count) -- 2.23.0.444.g18eeb5a265-goog