Add a mechanism to switch between WrProt and D-Bit based dirty logging while the VM is running (and possibly dirty logging is already enabled). Switching to/from PML is not currently supported, but that can be added later. Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 4 + arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/mmu.c | 146 +++++++++++++++++++++++++++++++- arch/x86/kvm/mmu.h | 1 + arch/x86/kvm/vmx.c | 10 ++- 5 files changed, 158 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3da22c92a5d6..bdbc87a26662 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1131,6 +1131,8 @@ struct kvm_x86_ops { * function can also add any un-flushed dirty state maintained by * the hardware to the mask (e.g. if flush_log_dirty is not * implemented.) + * - switch_dirty_log_mode: + * Switch to the given dirty log mode. */ void (*slot_enable_log_dirty)(struct kvm *kvm, struct kvm_memory_slot *slot); @@ -1141,6 +1143,7 @@ struct kvm_x86_ops { struct kvm_memory_slot *slot, gfn_t offset, unsigned long *mask); int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*switch_dirty_log_mode)(struct kvm *kvm, u8 mode); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; @@ -1202,6 +1205,7 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; extern u8 kvm_default_dirty_log_mode; +extern u8 kvm_supported_dirty_log_modes; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 2b1c442bffe6..ff2ed65be75c 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -420,6 +420,7 @@ struct kvm_nested_state { __u8 data[0]; }; +#define KVM_DIRTY_LOG_MODE_DEFAULT 0 #define KVM_DIRTY_LOG_MODE_WRPROT 1 #define KVM_DIRTY_LOG_MODE_DBIT 2 #define KVM_DIRTY_LOG_MODE_PML 4 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0839b8cfdf66..4abc75c97593 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -265,6 +265,9 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; u8 __read_mostly kvm_default_dirty_log_mode; EXPORT_SYMBOL_GPL(kvm_default_dirty_log_mode); +u8 __read_mostly kvm_supported_dirty_log_modes; +EXPORT_SYMBOL_GPL(kvm_supported_dirty_log_modes); + static void mmu_spte_set(u64 *sptep, u64 spte); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); @@ -436,6 +439,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, if (shadow_dirty_mask == 0) { enable_d_bit_logging = false; + kvm_supported_dirty_log_modes &= ~KVM_DIRTY_LOG_MODE_DBIT; if (kvm_default_dirty_log_mode == KVM_DIRTY_LOG_MODE_DBIT) kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_WRPROT; @@ -1704,6 +1708,30 @@ kvm_mmu_shadow_dirty_mask_test_and_clear(struct kvm *kvm, return mask; } +/* + * Test the D bit in the SPTE(s) corresponding to the GFN and return true if any + * SPTE has the D bit set. + * + * The MMU lock should be held before calling this function. + */ +bool kvm_mmu_test_shadow_dirty_mask(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset) +{ + struct kvm_rmap_head *rmap_head; + u64 *sptep; + struct rmap_iterator iter; + u64 pte_bits = 0; + + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset, + PT_PAGE_TABLE_LEVEL, slot); + + for_each_rmap_spte(rmap_head, &iter, sptep) + pte_bits |= *sptep; + + return pte_bits & shadow_dirty_mask; +} + /** * Gets the dirty state (if any) for selected PT level pages from the hardware * MMU structures and resets the hardware state to track those pages again. @@ -6081,9 +6109,13 @@ int kvm_mmu_module_init(void) BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64)); kvm_mmu_reset_all_pte_masks(); - kvm_default_dirty_log_mode = enable_d_bit_logging - ? KVM_DIRTY_LOG_MODE_DBIT - : KVM_DIRTY_LOG_MODE_WRPROT; + kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_WRPROT; + kvm_supported_dirty_log_modes = KVM_DIRTY_LOG_MODE_WRPROT; + + if (enable_d_bit_logging) { + kvm_supported_dirty_log_modes |= KVM_DIRTY_LOG_MODE_DBIT; + kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_DBIT; + } pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), @@ -6150,3 +6182,111 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } + +static void switch_dirty_log_mode_dbit_to_wrprot(struct kvm *kvm) +{ + ulong i; + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot; + bool flush = false; + + kvm_for_each_memslot(memslot, slots) + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) + flush |= kvm_mmu_slot_leaf_remove_write_access(kvm, + memslot); + /* + * We need to ensure that the write-protection gets propagated to all + * CPUs before we transfer the dirty bits to the dirty bitmap. + * Otherwise, it would be possible for some other CPU to write to a + * page some time after we have gone over that page in the loop below + * and then the page wouldn't get marked in the dirty bitmap. + */ + if (flush) + kvm_flush_remote_tlbs(kvm); + + spin_lock(&kvm->mmu_lock); + + kvm_for_each_memslot(memslot, slots) { + if (!(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)) + continue; + + for (i = 0; i < memslot->npages; i++) + if (!test_bit(i, memslot->dirty_bitmap) && + kvm_mmu_test_shadow_dirty_mask(kvm, memslot, i)) + set_bit(i, memslot->dirty_bitmap); + } + spin_unlock(&kvm->mmu_lock); + + kvm->arch.dirty_logging_mode = KVM_DIRTY_LOG_MODE_WRPROT; +} + +static void switch_dirty_log_mode_wrprot_to_dbit(struct kvm *kvm) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot; + + kvm_for_each_memslot(memslot, slots) + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) + kvm_mmu_slot_leaf_clear_dirty(kvm, memslot); + + /* + * No need to initiate a TLB flush here, since any page for which we + * cleared the dirty bit above would already be marked in the dirty + * bitmap. It isn't until the next get_dirty_log or enable_log_dirty + * that the clearing of the dirty bits needs to be propagated + * everywhere. + */ + + kvm->arch.dirty_logging_mode = KVM_DIRTY_LOG_MODE_DBIT; + + /* + * As an optimization, we could also remove the write-protection from + * all SPTEs here, rather than incurring faults as writes happen. + */ +} + +int kvm_mmu_switch_dirty_log_mode(struct kvm *kvm, u8 mode) +{ + int err = 0; + u8 old_mode; + + if (mode == KVM_DIRTY_LOG_MODE_DEFAULT) + mode = kvm_default_dirty_log_mode; + + if (hweight8(mode) != 1) + return -EINVAL; + + if (!(mode & kvm_supported_dirty_log_modes)) { + kvm_err("Dirty logging mode %u is not supported.\n", mode); + return -ENOTSUPP; + } + + kvm_debug("Switching dirty logging mode from %u to %u.\n", + kvm->arch.dirty_logging_mode, mode); + + mutex_lock(&kvm->slots_lock); + + old_mode = kvm->arch.dirty_logging_mode; + + if (mode != old_mode) { + if (mode == KVM_DIRTY_LOG_MODE_WRPROT && + old_mode == KVM_DIRTY_LOG_MODE_DBIT) + switch_dirty_log_mode_dbit_to_wrprot(kvm); + else if (mode == KVM_DIRTY_LOG_MODE_DBIT && + old_mode == KVM_DIRTY_LOG_MODE_WRPROT) + switch_dirty_log_mode_wrprot_to_dbit(kvm); + else if (kvm_x86_ops->switch_dirty_log_mode) + err = kvm_x86_ops->switch_dirty_log_mode(kvm, mode); + else + err = -ENOTSUPP; + } + + mutex_unlock(&kvm->slots_lock); + + if (err) + kvm_err("Trying to switch dirty logging mode from " + "%u to %u failed with error %d.\n", + old_mode, mode, err); + + return err; +} diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 6d39802a666d..b27dde010ec1 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -213,4 +213,5 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); +int kvm_mmu_switch_dirty_log_mode(struct kvm *kvm, u8 mode); #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c6b0477c855e..232115b84fbb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7992,9 +7992,17 @@ static __init int hardware_setup(void) kvm_x86_ops->slot_disable_log_dirty = NULL; kvm_x86_ops->flush_log_dirty = NULL; kvm_x86_ops->get_and_reset_log_dirty = NULL; - } else + } else { kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_PML; + /* + * Currently, switching between PML and other modes is not + * supported, so if PML is enabled, it is the only available + * mode. + */ + kvm_supported_dirty_log_modes = KVM_DIRTY_LOG_MODE_PML; + } + if (!cpu_has_vmx_preemption_timer()) kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; -- 2.19.1.568.g152ad8e336-goog