On 5/20/22 16:49, Paolo Bonzini wrote:
On 4/27/22 03:40, Sean Christopherson wrote:
+ * Wait for mn_active_invalidate_count, not mmu_notifier_count,
+ * to go away, as the invalidation in the mmu_notifier event
+ * occurs_before_ mmu_notifier_count is elevated.
+ *
+ * Note, mn_active_invalidate_count can change at any time as
+ * it's not protected by gpc->lock. But, it is guaranteed to
+ * be elevated before the mmu_notifier acquires gpc->lock, and
+ * isn't dropped until after mmu_notifier_seq is updated. So,
+ * this task may get a false positive of sorts, i.e. see an
+ * elevated count and wait even though it's technically safe to
+ * proceed (becase the mmu_notifier will invalidate the cache
+ *_after_ it's refreshed here), but the cache will never be
+ * refreshed with stale data, i.e. won't get false negatives.
I am all for lavish comments, but I think this is even too detailed.
What about:
And in fact this should be moved to a separate function.
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 50ce7b78b42f..321964ff42e1 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -112,6 +112,36 @@ static void gpc_release_pfn_and_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
}
}
+
+static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
+{
+ /*
+ * mn_active_invalidate_count acts for all intents and purposes
+ * like mmu_notifier_count here; but we cannot use the latter
+ * because the invalidation in the mmu_notifier event occurs
+ * _before_ mmu_notifier_count is elevated.
+ *
+ * Note, it does not matter that mn_active_invalidate_count
+ * is not protected by gpc->lock. It is guaranteed to
+ * be elevated before the mmu_notifier acquires gpc->lock, and
+ * isn't dropped until after mmu_notifier_seq is updated.
+ */
+ if (kvm->mn_active_invalidate_count)
+ return true;
+
+ /*
+ * Ensure mn_active_invalidate_count is read before
+ * mmu_notifier_seq. This pairs with the smp_wmb() in
+ * mmu_notifier_invalidate_range_end() to guarantee either the
+ * old (non-zero) value of mn_active_invalidate_count or the
+ * new (incremented) value of mmu_notifier_seq is observed.
+ */
+ smp_rmb();
+ if (kvm->mmu_notifier_seq != mmu_seq)
+ return true;
+ return false;
+}
+
static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
{
/* Note, the new page offset may be different than the old! */
@@ -129,7 +159,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
*/
gpc->valid = false;
- for (;;) {
+ do {
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
@@ -188,32 +218,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
* attempting to refresh.
*/
WARN_ON_ONCE(gpc->valid);
-
- /*
- * mn_active_invalidate_count acts for all intents and purposes
- * like mmu_notifier_count here; but we cannot use the latter
- * because the invalidation in the mmu_notifier event occurs
- * _before_ mmu_notifier_count is elevated.
- *
- * Note, it does not matter that mn_active_invalidate_count
- * is not protected by gpc->lock. It is guaranteed to
- * be elevated before the mmu_notifier acquires gpc->lock, and
- * isn't dropped until after mmu_notifier_seq is updated.
- */
- if (kvm->mn_active_invalidate_count)
- continue;
-
- /*
- * Ensure mn_active_invalidate_count is read before
- * mmu_notifier_seq. This pairs with the smp_wmb() in
- * mmu_notifier_invalidate_range_end() to guarantee either the
- * old (non-zero) value of mn_active_invalidate_count or the
- * new (incremented) value of mmu_notifier_seq is observed.
- */
- smp_rmb();
- if (kvm->mmu_notifier_seq == mmu_seq)
- break;
- }
+ } while (mmu_notifier_retry_cache(kvm, mmu_seq);
gpc->valid = true;
gpc->pfn = new_pfn;