Add kvm_arch_prepare_bitmap_age() for architectures to indiciate that they support bitmap-based aging in kvm_mmu_notifier_test_clear_young() and that they do not need KVM to grab the MMU lock for writing. This function allows architectures to do other locking or other preparatory work that it needs. If an architecture does not implement kvm_arch_prepare_bitmap_age() or is unable to do bitmap-based aging at runtime (and marks the bitmap as unreliable): 1. If a bitmap was provided, we inform the caller that the bitmap is unreliable (MMU_NOTIFIER_YOUNG_BITMAP_UNRELIABLE). 2. If a bitmap was not provided, fall back to the old logic. Also add logic for architectures to easily use the provided bitmap if they are able. The expectation is that the architecture's implementation of kvm_gfn_test_age() will use kvm_gfn_record_young(), and kvm_gfn_age() will use kvm_gfn_should_age(). Suggested-by: Yu Zhao <yuzhao@xxxxxxxxxx> Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> --- include/linux/kvm_host.h | 60 ++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 92 +++++++++++++++++++++++++++++----------- 2 files changed, 127 insertions(+), 25 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1800d03a06a9..5862fd7b5f9b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1992,6 +1992,26 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; +/* + * Architectures that support using bitmaps for kvm_age_gfn() and + * kvm_test_age_gfn should return true for kvm_arch_prepare_bitmap_age() + * and do any work they need to prepare. The subsequent walk will not + * automatically grab the KVM MMU lock, so some architectures may opt + * to grab it. + * + * If true is returned, a subsequent call to kvm_arch_finish_bitmap_age() is + * guaranteed. + */ +#ifndef kvm_arch_prepare_bitmap_age +static inline bool kvm_arch_prepare_bitmap_age(struct mmu_notifier *mn) +{ + return false; +} +#endif +#ifndef kvm_arch_finish_bitmap_age +static inline void kvm_arch_finish_bitmap_age(struct mmu_notifier *mn) {} +#endif + #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) { @@ -2076,9 +2096,16 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; } +struct test_clear_young_metadata { + unsigned long *bitmap; + unsigned long bitmap_offset_end; + unsigned long end; + bool unreliable; +}; union kvm_mmu_notifier_arg { pte_t pte; unsigned long attributes; + struct test_clear_young_metadata *metadata; }; struct kvm_gfn_range { @@ -2087,11 +2114,44 @@ struct kvm_gfn_range { gfn_t end; union kvm_mmu_notifier_arg arg; bool may_block; + bool lockless; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); + +static inline void kvm_age_set_unreliable(struct kvm_gfn_range *range) +{ + struct test_clear_young_metadata *args = range->arg.metadata; + + args->unreliable = true; +} +static inline unsigned long kvm_young_bitmap_offset(struct kvm_gfn_range *range, + gfn_t gfn) +{ + struct test_clear_young_metadata *args = range->arg.metadata; + + return hva_to_gfn_memslot(args->end - 1, range->slot) - gfn; +} +static inline void kvm_gfn_record_young(struct kvm_gfn_range *range, gfn_t gfn) +{ + struct test_clear_young_metadata *args = range->arg.metadata; + + WARN_ON_ONCE(gfn < range->start || gfn >= range->end); + if (args->bitmap) + __set_bit(kvm_young_bitmap_offset(range, gfn), args->bitmap); +} +static inline bool kvm_gfn_should_age(struct kvm_gfn_range *range, gfn_t gfn) +{ + struct test_clear_young_metadata *args = range->arg.metadata; + + WARN_ON_ONCE(gfn < range->start || gfn >= range->end); + if (args->bitmap) + return test_bit(kvm_young_bitmap_offset(range, gfn), + args->bitmap); + return true; +} #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d0545d88c802..7d80321e2ece 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -550,6 +550,7 @@ struct kvm_mmu_notifier_range { on_lock_fn_t on_lock; bool flush_on_ret; bool may_block; + bool lockless; }; /* @@ -598,6 +599,8 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, struct kvm_memslots *slots; int i, idx; + BUILD_BUG_ON(sizeof(gfn_range.arg) != sizeof(gfn_range.arg.pte)); + if (WARN_ON_ONCE(range->end <= range->start)) return r; @@ -637,15 +640,18 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, gfn_range.start = hva_to_gfn_memslot(hva_start, slot); gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot); gfn_range.slot = slot; + gfn_range.lockless = range->lockless; if (!r.found_memslot) { r.found_memslot = true; - KVM_MMU_LOCK(kvm); - if (!IS_KVM_NULL_FN(range->on_lock)) - range->on_lock(kvm); - - if (IS_KVM_NULL_FN(range->handler)) - break; + if (!range->lockless) { + KVM_MMU_LOCK(kvm); + if (!IS_KVM_NULL_FN(range->on_lock)) + range->on_lock(kvm); + + if (IS_KVM_NULL_FN(range->handler)) + break; + } } r.ret |= range->handler(kvm, &gfn_range); } @@ -654,7 +660,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, if (range->flush_on_ret && r.ret) kvm_flush_remote_tlbs(kvm); - if (r.found_memslot) + if (r.found_memslot && !range->lockless) KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); @@ -682,19 +688,24 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, return __kvm_handle_hva_range(kvm, &range).ret; } -static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn, - unsigned long start, - unsigned long end, - gfn_handler_t handler) +static __always_inline int kvm_handle_hva_range_no_flush( + struct mmu_notifier *mn, + unsigned long start, + unsigned long end, + gfn_handler_t handler, + union kvm_mmu_notifier_arg arg, + bool lockless) { struct kvm *kvm = mmu_notifier_to_kvm(mn); const struct kvm_mmu_notifier_range range = { .start = start, .end = end, .handler = handler, + .arg = arg, .on_lock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = false, + .lockless = lockless, }; return __kvm_handle_hva_range(kvm, &range).ret; @@ -909,15 +920,36 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, kvm_age_gfn); } -static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - unsigned long *bitmap) +static int kvm_mmu_notifier_test_clear_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end, + unsigned long *bitmap, + bool clear) { - trace_kvm_age_hva(start, end); + if (kvm_arch_prepare_bitmap_age(mn)) { + struct test_clear_young_metadata args = { + .bitmap = bitmap, + .end = end, + .unreliable = false, + }; + union kvm_mmu_notifier_arg arg = { + .metadata = &args + }; + bool young; + + young = kvm_handle_hva_range_no_flush( + mn, start, end, + clear ? kvm_age_gfn : kvm_test_age_gfn, + arg, true); + + kvm_arch_finish_bitmap_age(mn); - /* We don't support bitmaps. Don't test or clear anything. */ + if (!args.unreliable) + return young ? MMU_NOTIFIER_YOUNG_FAST : 0; + } + + /* A bitmap was passed but the architecture doesn't support bitmaps */ if (bitmap) return MMU_NOTIFIER_YOUNG_BITMAP_UNRELIABLE; @@ -934,7 +966,21 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, * cadence. If we find this inaccurate, we might come up with a * more sophisticated heuristic later. */ - return kvm_handle_hva_range_no_flush(mn, start, end, kvm_age_gfn); + return kvm_handle_hva_range_no_flush( + mn, start, end, clear ? kvm_age_gfn : kvm_test_age_gfn, + KVM_MMU_NOTIFIER_NO_ARG, false); +} + +static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end, + unsigned long *bitmap) +{ + trace_kvm_age_hva(start, end); + + return kvm_mmu_notifier_test_clear_young(mn, mm, start, end, bitmap, + true); } static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, @@ -945,12 +991,8 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, { trace_kvm_test_age_hva(start, end); - /* We don't support bitmaps. Don't test or clear anything. */ - if (bitmap) - return MMU_NOTIFIER_YOUNG_BITMAP_UNRELIABLE; - - return kvm_handle_hva_range_no_flush(mn, start, end, - kvm_test_age_gfn); + return kvm_mmu_notifier_test_clear_young(mn, mm, start, end, bitmap, + false); } static void kvm_mmu_notifier_release(struct mmu_notifier *mn, -- 2.44.0.478.gd926399ef9-goog