Instead of acquiring the MMU lock in the arch-generic code, force each implementation of kvm_arch_mmu_enable_log_dirty_pt_masked to acquire the MMU lock as needed. This is in preparation for performing eager page splitting in the x86 implementation of kvm_arch_mmu_enable_log_dirty_pt_masked, which involves dropping the MMU lock in write-mode and re-acquiring it in read mode (and possibly rescheduling) during splitting. Pushing the MMU lock down into the arch code makes the x86 synchronization much easier to reason about, and does not harm readability of other architectures. This should be a safe change because: * No architecture requires a TLB flush before dropping the MMU lock. * The dirty bitmap does not need to be synchronized with changes to the page tables by the MMU lock as evidenced by the fact that x86 modifies the dirty bitmap without acquiring the MMU lock in fast_page_fault. This change does increase the number of times the MMU lock is acquired and released during KVM_CLEAR_DIRTY_LOG, but this is not a performance critical path and breaking up the lock duration may reduce contention on vCPU threads. Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> --- arch/arm64/kvm/mmu.c | 2 ++ arch/mips/kvm/mmu.c | 5 +++-- arch/riscv/kvm/mmu.c | 2 ++ arch/x86/kvm/mmu/mmu.c | 4 ++++ virt/kvm/dirty_ring.c | 2 -- virt/kvm/kvm_main.c | 4 ---- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e65acf35cee3..48085cb534d5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -749,7 +749,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { + spin_lock(&kvm->mmu_lock); kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); + spin_unlock(&kvm->mmu_lock); } static void kvm_send_hwpoison_signal(unsigned long address, short lsb) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 1bfd1b501d82..7e67edcd5aae 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -409,8 +409,7 @@ int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory * slot to be write protected * - * Walks bits set in mask write protects the associated pte's. Caller must - * acquire @kvm->mmu_lock. + * Walks bits set in mask write protects the associated pte's. */ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -420,7 +419,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, gfn_t start = base_gfn + __ffs(mask); gfn_t end = base_gfn + __fls(mask); + spin_lock(&kvm->mmu_lock); kvm_mips_mkclean_gpa_pt(kvm, start, end); + spin_unlock(&kvm->mmu_lock); } /* diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 7d884b15cf5e..d084ac939b0f 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -424,7 +424,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + spin_lock(&kvm->mmu_lock); stage2_wp_range(kvm, start, end); + spin_unlock(&kvm->mmu_lock); } void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9116c6a4ced1..c9e5fe290714 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1347,6 +1347,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { + write_lock(&kvm->mmu_lock); + /* * Huge pages are NOT write protected when we start dirty logging in * initially-all-set mode; must write protect them here so that they @@ -1374,6 +1376,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask); else kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); + + write_unlock(&kvm->mmu_lock); } int kvm_cpu_dirty_log_size(void) diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index 88f4683198ea..6b26ec60c96a 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -61,9 +61,7 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) if (!memslot || (offset + __fls(mask)) >= memslot->npages) return; - KVM_MMU_LOCK(kvm); kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); - KVM_MMU_UNLOCK(kvm); } int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3595eddd476a..da4850fb2982 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2048,7 +2048,6 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); memset(dirty_bitmap_buffer, 0, n); - KVM_MMU_LOCK(kvm); for (i = 0; i < n / sizeof(long); i++) { unsigned long mask; gfn_t offset; @@ -2064,7 +2063,6 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); } - KVM_MMU_UNLOCK(kvm); } if (flush) @@ -2159,7 +2157,6 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) return -EFAULT; - KVM_MMU_LOCK(kvm); for (offset = log->first_page, i = offset / BITS_PER_LONG, n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; i++, offset += BITS_PER_LONG) { @@ -2182,7 +2179,6 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, offset, mask); } } - KVM_MMU_UNLOCK(kvm); if (flush) kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); -- 2.34.1.173.g76aa8bc2d0-goog