The busy loop in hva_to_pfn_retry() is worse than a normal page fault retry loop because it spins even while it's waiting for the invalidation to complete. It isn't just that a page might get faulted out again before it's actually accessed. Introduce a wait queue to be woken when kvm->mn_active_invalidate_count reaches zero, and wait on it if there is any pending invalidation which affects the GPC being refreshed. Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> [sean: massage comment as part of rebasing] Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 9 ++++++--- virt/kvm/pfncache.c | 30 ++++++++++++++++++++++++++---- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c0ed735f0f4..a9d7b2200b6f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -787,6 +787,7 @@ struct kvm { struct list_head gpc_list; u64 mmu_gpc_invalidate_range_start; u64 mmu_gpc_invalidate_range_end; + wait_queue_head_t gpc_invalidate_wq; /* * created_vcpus is protected by kvm->lock, and is incremented diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b9223ecab2ca..3ba6d109a941 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -849,11 +849,13 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, spin_unlock(&kvm->mn_invalidate_lock); /* - * There can only be one waiter, since the wait happens under - * slots_lock. + * There can only be one memslots waiter, since the wait happens under + * slots_lock, but there can be multiple gpc waiters. */ - if (wake) + if (wake) { + wake_up(&kvm->gpc_invalidate_wq); rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); + } } static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, @@ -1163,6 +1165,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) INIT_LIST_HEAD(&kvm->gpc_list); spin_lock_init(&kvm->gpc_lock); + init_waitqueue_head(&kvm->gpc_invalidate_wq); kvm->mmu_gpc_invalidate_range_start = KVM_HVA_ERR_BAD; kvm->mmu_gpc_invalidate_range_end = KVM_HVA_ERR_BAD; diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 2163bb6b899c..77cc5633636a 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -180,6 +180,30 @@ static bool gpc_invalidate_retry_hva(struct gfn_to_pfn_cache *gpc, return gpc->kvm->mmu_invalidate_seq != mmu_seq; } +static void gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc) +{ + struct kvm *kvm = gpc->kvm; + + spin_lock(&kvm->mn_invalidate_lock); + if (gpc_invalidate_in_progress_hva(gpc)) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&kvm->gpc_invalidate_wq, &wait, + TASK_UNINTERRUPTIBLE); + + if (!gpc_invalidate_in_progress_hva(gpc)) + break; + + spin_unlock(&kvm->mn_invalidate_lock); + schedule(); + spin_lock(&kvm->mn_invalidate_lock); + } + finish_wait(&kvm->gpc_invalidate_wq, &wait); + } + spin_unlock(&kvm->mn_invalidate_lock); +} + static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) { /* Note, the new page offset may be different than the old! */ @@ -230,10 +254,8 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) * trying to race ahead in the hope that a different task makes * the cache valid. */ - while (gpc_invalidate_in_progress_hva(gpc)) { - if (!cond_resched()) - cpu_relax(); - } + while (gpc_invalidate_in_progress_hva(gpc)) + gpc_wait_for_invalidations(gpc); mmu_seq = gpc->kvm->mmu_invalidate_seq; smp_rmb(); -- 2.47.0.rc1.288.g06298d1525-goog --oDMvAmx7QP3BxXt9--