From: David Woodhouse <dwmw@xxxxxxxxxxxx> By performing the invalidation from the 'end' hook, the process is a lot cleaner and simpler because it is guaranteed that ->needs_invalidation will be cleared after hva_to_pfn() has been called, so the only thing that hva_to_pfn_retry() needs to do is *wait* for there to be no pending invalidations. Another false positive on the range based checks is thus removed as well. Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> --- virt/kvm/kvm_main.c | 20 +++++++---------- virt/kvm/kvm_mm.h | 12 +++++----- virt/kvm/pfncache.c | 55 ++++++++++++++++++++------------------------- 3 files changed, 38 insertions(+), 49 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e04eb700448b..cca870f1a78c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -795,18 +795,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, } spin_unlock(&kvm->mn_invalidate_lock); - /* - * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e. - * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring - * each cache's lock. There are relatively few caches in existence at - * any given time, and the caches themselves can check for hva overlap, - * i.e. don't need to rely on memslot overlap checks for performance. - * Because this runs without holding mmu_lock, the pfn caches must use - * mn_active_invalidate_count (see above) instead of - * mmu_invalidate_in_progress. - */ - gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end); - /* * If one or more memslots were found and thus zapped, notify arch code * that guest memory has been reclaimed. This needs to be done *after* @@ -860,6 +848,14 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, __kvm_handle_hva_range(kvm, &hva_range); + /* + * It's safe to invalidate the gfn_to_pfn_caches from this 'end' + * hook, because hva_to_pfn_retry() will wait until no active + * invalidations could be affecting the corresponding uHVA + * before before allowing a newly mapped GPC to become valid. + */ + gfn_to_pfn_cache_invalidate(kvm, range->start, range->end); + /* Pairs with the increment in range_start(). */ spin_lock(&kvm->mn_invalidate_lock); if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count)) diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index 715f19669d01..34e4e67f09f8 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -24,13 +24,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, bool *async, bool write_fault, bool *writable); #ifdef CONFIG_HAVE_KVM_PFNCACHE -void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, - unsigned long start, - unsigned long end); +void gfn_to_pfn_cache_invalidate(struct kvm *kvm, + unsigned long start, + unsigned long end); #else -static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, - unsigned long start, - unsigned long end) +static inline void gfn_to_pfn_cache_invalidate(struct kvm *kvm, + unsigned long start, + unsigned long end) { } #endif /* HAVE_KVM_PFNCACHE */ diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index fa494eb3d924..3f48df8cd6e5 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -20,10 +20,15 @@ #include "kvm_mm.h" /* - * MMU notifier 'invalidate_range_start' hook. + * MMU notifier 'invalidate_range_end' hook. The hva_to_pfn_retry() function + * below may look up a PFN just before it is zapped, and may be mapping it + * concurrently with the actual invalidation (with the GPC lock dropped). By + * using a separate 'needs_invalidation' flag, the concurrent invalidation + * can handle this case, causing hva_to_pfn_retry() to drop its result and + * retry correctly. */ -void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, - unsigned long end) +void gfn_to_pfn_cache_invalidate(struct kvm *kvm, unsigned long start, + unsigned long end) { struct gfn_to_pfn_cache *gpc; @@ -140,15 +145,12 @@ static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc) (gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva); } -static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc) +static void gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc) { - bool waited = false; - spin_lock(&gpc->kvm->mn_invalidate_lock); if (gpc_invalidations_pending(gpc)) { DEFINE_WAIT(wait); - waited = true; for (;;) { prepare_to_wait(&gpc->kvm->gpc_invalidate_wq, &wait, TASK_UNINTERRUPTIBLE); @@ -163,10 +165,8 @@ static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc) finish_wait(&gpc->kvm->gpc_invalidate_wq, &wait); } spin_unlock(&gpc->kvm->mn_invalidate_lock); - return waited; } - static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) { /* Note, the new page offset may be different than the old! */ @@ -199,28 +199,6 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) write_unlock_irq(&gpc->lock); - /* - * Invalidation occurs from the invalidate_range_start() hook, - * which could already have happened before __kvm_gpc_refresh() - * (or the previous turn around this loop) took gpc->lock(). - * If so, and if the corresponding invalidate_range_end() hook - * hasn't happened yet, hva_to_pfn() could return a mapping - * which is about to be stale and which should not be used. So - * check if there are any currently-running invalidations which - * affect the uHVA of this GPC, and retry if there are. Any - * invalidation which starts after gpc->needs_invalidation is - * set is fine, because it will clear that flag and trigger a - * retry. And any invalidation which *completes* by having its - * invalidate_range_end() hook called immediately prior to this - * check is also fine, because the page tables are guaranteed - * to have been changted already, so hva_to_pfn() won't return - * a stale mapping in that case anyway. - */ - if (gpc_wait_for_invalidations(gpc)) { - write_lock_irq(&gpc->lock); - continue; - } - /* * If the previous iteration "failed" due to an mmu_notifier * event, release the pfn and unmap the kernel virtual address @@ -261,6 +239,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) goto out_error; } + /* + * Avoid populating a GPC with a user address which is already + * being invalidated, if the invalidate_range_start() notifier + * has already been called. The actual invalidation happens in + * the invalidate_range_end() callback, so wait until there are + * no active invalidations (for the relevant HVA). + */ + gpc_wait_for_invalidations(gpc); write_lock_irq(&gpc->lock); @@ -269,6 +255,13 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) * attempting to refresh. */ WARN_ON_ONCE(gpc->valid); + + /* + * Since gfn_to_pfn_cache_invalidate() is called from the + * kvm_mmu_notifier_invalidate_range_end() callback, it can + * invalidate the GPC the moment after hva_to_pfn() returned + * a valid PFN. If that happens, retry. + */ } while (!gpc->needs_invalidation); gpc->valid = true; -- 2.44.0