When using initially-all-set, huge pages are not write-protected when dirty logging is enabled on the memslot. Instead they are write-protected once userspace invokes CLEAR_DIRTY_LOG for the first time and only for the specific sub-region being cleared. Enhance CLEAR_DIRTY_LOG to also try to split huge pages prior to write-protecting to avoid causing write-protection faults on vCPU threads. This also allows userspace to smear the cost of huge page splitting across multiple ioctls rather than splitting the entire memslot when not using initially-all-set. Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 4 ++++ arch/x86/kvm/mmu/mmu.c | 36 +++++++++++++++++++++++++++------ arch/x86/kvm/x86.c | 2 +- arch/x86/kvm/x86.h | 2 ++ 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a507109e886..3e537e261562 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1576,6 +1576,10 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, int target_level); +void kvm_mmu_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + u64 start, u64 end, + int target_level); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c9e5fe290714..55640d73df5a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1362,6 +1362,20 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask); gfn_t end = slot->base_gfn + gfn_offset + __fls(mask); + /* + * Try to proactively split any huge pages down to 4KB so that + * vCPUs don't have to take write-protection faults. + * + * Drop the MMU lock since huge page splitting uses its own + * locking scheme and does not require the write lock in all + * cases. + */ + if (READ_ONCE(eagerly_split_huge_pages_for_dirty_logging)) { + write_unlock(&kvm->mmu_lock); + kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K); + write_lock(&kvm->mmu_lock); + } + kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M); /* Cross two large pages? */ @@ -5811,13 +5825,11 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } -void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, - const struct kvm_memory_slot *memslot, - int target_level) +void kvm_mmu_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + u64 start, u64 end, + int target_level) { - u64 start = memslot->base_gfn; - u64 end = start + memslot->npages; - if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); @@ -5825,6 +5837,18 @@ void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, } } +void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + int target_level) +{ + u64 start, end; + + start = memslot->base_gfn; + end = start + memslot->npages; + + kvm_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); +} + static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fb5592bf2eee..e27a3d6e3978 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -187,7 +187,7 @@ module_param(force_emulation_prefix, bool, S_IRUGO); int __read_mostly pi_inject_timer = -1; module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); -static bool __read_mostly eagerly_split_huge_pages_for_dirty_logging = true; +bool __read_mostly eagerly_split_huge_pages_for_dirty_logging = true; module_param(eagerly_split_huge_pages_for_dirty_logging, bool, 0644); /* diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 4abcd8d9836d..825e47451875 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -352,6 +352,8 @@ extern int pi_inject_timer; extern bool report_ignored_msrs; +extern bool eagerly_split_huge_pages_for_dirty_logging; + static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, -- 2.34.1.173.g76aa8bc2d0-goog