Implement dirty list full forcing vcpus to exit. Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/mmu.c | 7 +++++++ arch/x86/kvm/vmx.c | 7 +++++++ arch/x86/kvm/x86.c | 10 ++++++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 69 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6dfb14a..20a9fc8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,6 +75,7 @@ #define KVM_REQ_HV_RESET 28 #define KVM_REQ_HV_EXIT 29 #define KVM_REQ_HV_STIMER 30 +#define KVM_REQ_EXIT_DIRTY_LOG_FULL 31 #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -997,6 +998,8 @@ struct kvm_x86_ops { * - enable_log_dirty_pt_masked: * called when reenabling log dirty for the GFNs in the mask after * corresponding bits are cleared in slot->dirty_bitmap. + * - cpu_dirty_log_size: + * called to inquire about the size of the hardware dirty log */ void (*slot_enable_log_dirty)(struct kvm *kvm, struct kvm_memory_slot *slot); @@ -1006,6 +1009,8 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); + int (*cpu_dirty_log_size)(void); + /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; @@ -1388,6 +1393,8 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq); +int kvm_mt_cpu_dirty_log_size(void); + static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { if (kvm_x86_ops->vcpu_blocking) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7012de4..e0668a0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4980,6 +4980,13 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) } } +int kvm_mt_cpu_dirty_log_size(void) +{ + if (kvm_x86_ops->cpu_dirty_log_size) + return kvm_x86_ops->cpu_dirty_log_size(); + return 0; +} + static unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ba20b00..76f88b0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6729,6 +6729,7 @@ static __init int hardware_setup(void) kvm_x86_ops->slot_disable_log_dirty = NULL; kvm_x86_ops->flush_log_dirty = NULL; kvm_x86_ops->enable_log_dirty_pt_masked = NULL; + kvm_x86_ops->cpu_dirty_log_size = NULL; } if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { @@ -11503,6 +11504,11 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEATURE_CONTROL_LMCE; } +static int vmx_cpu_dirty_log_size(void) +{ + return PML_ENTITY_NUM; +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -11617,6 +11623,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .slot_disable_log_dirty = vmx_slot_disable_log_dirty, .flush_log_dirty = vmx_flush_log_dirty, .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + .cpu_dirty_log_size = vmx_cpu_dirty_log_size, .pre_block = vmx_pre_block, .post_block = vmx_post_block, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5707129..e2f4cee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6714,6 +6714,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) kvm_hv_process_stimers(vcpu); + if (kvm_check_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_DIRTY_LOG_FULL; + r = -EINTR; + if (vcpu->need_exit) { + vcpu->need_exit = false; + kvm_make_all_cpus_request(vcpu->kvm, + KVM_REQ_EXIT_DIRTY_LOG_FULL); + } + goto out; + } } /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7a85b30..b7fedeb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -283,6 +283,7 @@ struct kvm_vcpu { struct dentry *debugfs_dentry; #ifdef KVM_DIRTY_LOG_PAGE_OFFSET struct gfn_list_t *dirty_logs; + bool need_exit; #endif }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 05332de..bacb8db 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -205,6 +205,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_S390_STSI 25 #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 +#define KVM_EXIT_DIRTY_LOG_FULL 28 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bff980c..00d7989 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -270,6 +270,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) } vcpu->dirty_logs = page_address(page); } + vcpu->need_exit = false; #endif kvm_vcpu_set_in_spin_loop(vcpu, false); @@ -3030,6 +3031,29 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) } #ifdef KVM_DIRTY_LOG_PAGE_OFFSET +static void kvm_mt_dirty_log_full(struct kvm *kvm, struct kvm_vcpu *vcpu) +{ + /* + * Request vcpu exits, but if interrupts are disabled, we have + * to defer the requests because smp_call_xxx may deadlock when + * called that way. + */ + if (vcpu && irqs_disabled()) { + kvm_make_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu); + vcpu->need_exit = true; + } else { + WARN_ON(irqs_disabled()); + kvm_make_all_cpus_request(kvm, + KVM_REQ_EXIT_DIRTY_LOG_FULL); + } +} + +/* + * estimated number of pages being dirtied during vcpu exit, not counting + * hardware dirty log (PML) flush + */ +#define KVM_MT_DIRTY_PAGE_NUM_EXTRA 128 + void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -3037,6 +3061,7 @@ void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot, int slot_id; u32 as_id = 0; u64 offset; + u32 extra = KVM_MT_DIRTY_PAGE_NUM_EXTRA; if (!slot || !slot->dirty_bitmap || !kvm->dirty_log_size) return; @@ -3068,6 +3093,17 @@ void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot, gfnlist->dirty_gfns[gfnlist->dirty_index].offset = offset; smp_wmb(); gfnlist->dirty_index++; + + /* + * more pages will be dirtied during vcpu exit, e.g. pml log + * being flushed. So allow some buffer space. + */ + if (vcpu) + extra += kvm_mt_cpu_dirty_log_size(); + + if (gfnlist->dirty_index == (kvm->max_dirty_logs - extra)) + kvm_mt_dirty_log_full(kvm, vcpu); + if (!vcpu) spin_unlock(&kvm->dirty_log_lock); } -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html