在 2023/8/10 17:02, Yan Zhao 写道: > Register to .numa_protect() callback in mmu notifier so that KVM can get > acurate information about when a page is PROT_NONE protected in primary > MMU and unmap it in secondary MMU accordingly. > > In KVM's .invalidate_range_start() handler, if the event is to notify that > the range may be protected to PROT_NONE for NUMA migration purpose, > don't do the unmapping in secondary MMU. Hold on until.numa_protect() > comes. > > Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> > --- > virt/kvm/kvm_main.c | 25 ++++++++++++++++++++++--- > 1 file changed, 22 insertions(+), 3 deletions(-) > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index dfbaafbe3a00..907444a1761b 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -711,6 +711,20 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, > kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn); > } > > +static void kvm_mmu_notifier_numa_protect(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, > + unsigned long end) > +{ > + struct kvm *kvm = mmu_notifier_to_kvm(mn); > + > + WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); > + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) > + return; > + > + kvm_handle_hva_range(mn, start, end, __pte(0), kvm_unmap_gfn_range); > +} numa balance will scan wide memory range, and there will be one time ipi notification with kvm_flush_remote_tlbs. With page level notification, it may bring out lots of flush remote tlb ipi notification. however numa balance notification, pmd table of vm maybe needs not be freed in kvm_unmap_gfn_range. Regards Bibo Mao > + > void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, > unsigned long end) > { > @@ -744,14 +758,18 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, > const struct mmu_notifier_range *range) > { > struct kvm *kvm = mmu_notifier_to_kvm(mn); > + bool is_numa = (range->event == MMU_NOTIFY_PROTECTION_VMA) && > + (range->flags & MMU_NOTIFIER_RANGE_NUMA); > const struct kvm_hva_range hva_range = { > .start = range->start, > .end = range->end, > .pte = __pte(0), > - .handler = kvm_unmap_gfn_range, > + .handler = !is_numa ? kvm_unmap_gfn_range : > + (void *)kvm_null_fn, > .on_lock = kvm_mmu_invalidate_begin, > - .on_unlock = kvm_arch_guest_memory_reclaimed, > - .flush_on_ret = true, > + .on_unlock = !is_numa ? kvm_arch_guest_memory_reclaimed : > + (void *)kvm_null_fn, > + .flush_on_ret = !is_numa ? true : false, > .may_block = mmu_notifier_range_blockable(range), > }; > > @@ -899,6 +917,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { > .clear_young = kvm_mmu_notifier_clear_young, > .test_young = kvm_mmu_notifier_test_young, > .change_pte = kvm_mmu_notifier_change_pte, > + .numa_protect = kvm_mmu_notifier_numa_protect, > .release = kvm_mmu_notifier_release, > }; >