Implement a dirty page threshold which when triggered forces vcpus to exit. Due to limited buffering on the host, it should be ensured that checkpoint state is captured before too many pages have been dirtied. Exceeding buffer space would effectively force the two sides to be broken apart and be resynchronized from scratch. This "divergence" event is costly to repair. Given that, an "emergency" stop is needed so that once a critical threshold of dirty pages has been reached, if the VM execution has not been stopped, VM exits with a new exit reason indicating that the dirty log is full. This only kicks in after a predefined threshold of dirty pages has been reached and the threshold and buffer sizes are selected to make the use of the "emergency stop" a rare event. Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 11 ++++++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 34 ++++++++++++++++++++++++++++++- 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 52bff2b..2b43660 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -71,6 +71,7 @@ #define KVM_REQ_HV_RESET 28 #define KVM_REQ_HV_EXIT 29 #define KVM_REQ_HV_STIMER 30 +#define KVM_REQ_EXIT_DIRTY_LOG_FULL 31 #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b7798c..deede71 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6489,6 +6489,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 1; goto out; } + if (kvm_check_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_DIRTY_LOG_FULL; + r = 0; + goto out; + } if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) record_steal_time(vcpu); if (kvm_check_request(KVM_REQ_SMI, vcpu)) @@ -6687,6 +6692,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + if (vcpu->need_exit) { + vcpu->need_exit = false; + kvm_make_all_cpus_request(vcpu->kvm, + KVM_REQ_EXIT_DIRTY_LOG_FULL); + } + /* * Profile KVM exit RIPs: */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5793ecf..08bda35 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -257,6 +257,7 @@ struct kvm_vcpu { } spin_loop; #endif bool preempted; + bool need_exit; struct kvm_vcpu_arch arch; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 736668d..97520c4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -205,6 +205,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_S390_STSI 25 #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 +#define KVM_EXIT_DIRTY_LOG_FULL 28 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ba99cbc6..e22d7f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2006,6 +2006,25 @@ static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, } } +static void check_dirty_trigger(struct kvm *kvm, struct kvm_vcpu *vcpu, + int count) +{ + if (count > kvm->mt.dirty_trigger) { + /* + * Request vcpu exits, but if interrupts are disabled, we have + * to defer the requests because smp_call_xxx may deadlock when + * called that way. + */ + if (vcpu && irqs_disabled()) { + vcpu->need_exit = 1; + } else { + WARN_ON(irqs_disabled()); + kvm_make_all_cpus_request(kvm, + KVM_REQ_EXIT_DIRTY_LOG_FULL); + } + } +} + /* * We have some new dirty pages for our sublist waiter. Enough to merit * waking it up? @@ -2079,6 +2098,7 @@ static void mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot, if ((gfnlist->dirty_index % DIRTY_GFN_ADD_GRANULARITY) == 0) { spin_lock(&kvm->mt.lock); kvm->mt.tot_pages += DIRTY_GFN_ADD_GRANULARITY; + check_dirty_trigger(kvm, vcpu, kvm->mt.tot_pages); mt_sw_add_pages(kvm); spin_unlock(&kvm->mt.lock); } @@ -2433,6 +2453,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (IS_ERR(vcpu)) return PTR_ERR(vcpu); + vcpu->need_exit = false; + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); r = kvm_arch_vcpu_setup(vcpu); @@ -3627,7 +3649,17 @@ static int kvm_vm_ioctl_mt_sublist_fetch(struct kvm *kvm, static int kvm_vm_ioctl_mt_dirty_trigger(struct kvm *kvm, int dirty_trigger) { - return -EINVAL; + if (!kvm->mt.gfn_list.dirty_gfns) + return -EINVAL; + + if (kvm->mt.gfn_list.max_dirty < dirty_trigger) + return -EINVAL; + + kvm->mt.dirty_trigger = dirty_trigger; + + check_dirty_trigger(kvm, NULL, kvm->mt.tot_pages); + + return 0; } static long kvm_vm_ioctl(struct file *filp, -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html