[PATCH v2 3/4] KVM: Dirty memory tracking for performant checkpointing solutions

"Cao, Lei" <Lei.Cao@xxxxxxxxxxx> · Wed, 4 Jan 2017 20:44:24 +0000

Implement dirty list full forcing vcpus to exit.

Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/mmu.c              |  7 +++++++
 arch/x86/kvm/vmx.c              |  7 +++++++
 arch/x86/kvm/x86.c              | 10 ++++++++++
 include/linux/kvm_host.h        |  1 +
 include/uapi/linux/kvm.h        |  1 +
 virt/kvm/kvm_main.c             | 36 ++++++++++++++++++++++++++++++++++++
 7 files changed, 69 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6dfb14a..20a9fc8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -75,6 +75,7 @@
 #define KVM_REQ_HV_RESET          28
 #define KVM_REQ_HV_EXIT           29
 #define KVM_REQ_HV_STIMER         30
+#define KVM_REQ_EXIT_DIRTY_LOG_FULL 31
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -997,6 +998,8 @@ struct kvm_x86_ops {
 	 *  - enable_log_dirty_pt_masked:
 	 *	called when reenabling log dirty for the GFNs in the mask after
 	 *	corresponding bits are cleared in slot->dirty_bitmap.
+	 *  - cpu_dirty_log_size:
+	 *      called to inquire about the size of the hardware dirty log
 	 */
 	void (*slot_enable_log_dirty)(struct kvm *kvm,
 				      struct kvm_memory_slot *slot);
@@ -1006,6 +1009,8 @@ struct kvm_x86_ops {
 	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
 					   struct kvm_memory_slot *slot,
 					   gfn_t offset, unsigned long mask);
+	int (*cpu_dirty_log_size)(void);
+
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
 
@@ -1388,6 +1393,8 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 		     struct kvm_lapic_irq *irq);
 
+int kvm_mt_cpu_dirty_log_size(void);
+
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
 	if (kvm_x86_ops->vcpu_blocking)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7012de4..e0668a0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4980,6 +4980,13 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
 	}
 }
 
+int kvm_mt_cpu_dirty_log_size(void)
+{
+	if (kvm_x86_ops->cpu_dirty_log_size)
+		return kvm_x86_ops->cpu_dirty_log_size();
+	return 0;
+}
+
 static unsigned long
 mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ba20b00..76f88b0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6729,6 +6729,7 @@ static __init int hardware_setup(void)
 		kvm_x86_ops->slot_disable_log_dirty = NULL;
 		kvm_x86_ops->flush_log_dirty = NULL;
 		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
+		kvm_x86_ops->cpu_dirty_log_size = NULL;
 	}
 
 	if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
@@ -11503,6 +11504,11 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 			~FEATURE_CONTROL_LMCE;
 }
 
+static int vmx_cpu_dirty_log_size(void)
+{
+	return PML_ENTITY_NUM;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -11617,6 +11623,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
 	.flush_log_dirty = vmx_flush_log_dirty,
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+	.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
 
 	.pre_block = vmx_pre_block,
 	.post_block = vmx_post_block,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5707129..e2f4cee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6714,6 +6714,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
+		if (kvm_check_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_DIRTY_LOG_FULL;
+			r = -EINTR;
+			if (vcpu->need_exit) {
+				vcpu->need_exit = false;
+				kvm_make_all_cpus_request(vcpu->kvm,
+					KVM_REQ_EXIT_DIRTY_LOG_FULL);
+			}
+			goto out;
+		}
 	}
 
 	/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7a85b30..b7fedeb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -283,6 +283,7 @@ struct kvm_vcpu {
 	struct dentry *debugfs_dentry;
 #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
 	struct gfn_list_t *dirty_logs;
+	bool need_exit;
 #endif
 };
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 05332de..bacb8db 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -205,6 +205,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_HYPERV           27
+#define KVM_EXIT_DIRTY_LOG_FULL   28
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bff980c..00d7989 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -270,6 +270,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 		}
 		vcpu->dirty_logs = page_address(page);
 	}
+	vcpu->need_exit = false;
 #endif
 
 	kvm_vcpu_set_in_spin_loop(vcpu, false);
@@ -3030,6 +3031,29 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 }
 
 #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+static void kvm_mt_dirty_log_full(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Request vcpu exits, but if interrupts are disabled, we have
+	 * to defer the requests because smp_call_xxx may deadlock when
+	 * called that way.
+	 */
+	if (vcpu && irqs_disabled()) {
+		kvm_make_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu);
+		vcpu->need_exit = true;
+	} else {
+		WARN_ON(irqs_disabled());
+		kvm_make_all_cpus_request(kvm,
+					  KVM_REQ_EXIT_DIRTY_LOG_FULL);
+	}
+}
+
+/*
+ * estimated number of pages being dirtied during vcpu exit, not counting
+ * hardware dirty log (PML) flush
+ */
+#define KVM_MT_DIRTY_PAGE_NUM_EXTRA 128
+
 void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
 	struct kvm_vcpu *vcpu, gfn_t gfn)
 {
@@ -3037,6 +3061,7 @@ void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
 	int slot_id;
 	u32 as_id = 0;
 	u64 offset;
+	u32 extra = KVM_MT_DIRTY_PAGE_NUM_EXTRA;
 
 	if (!slot || !slot->dirty_bitmap || !kvm->dirty_log_size)
 		return;
@@ -3068,6 +3093,17 @@ void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
 	gfnlist->dirty_gfns[gfnlist->dirty_index].offset = offset;
 	smp_wmb();
 	gfnlist->dirty_index++;
+
+	/*
+	 * more pages will be dirtied during vcpu exit, e.g. pml log
+	 * being flushed. So allow some buffer space.
+	 */
+	if (vcpu)
+		extra += kvm_mt_cpu_dirty_log_size();
+
+	if (gfnlist->dirty_index == (kvm->max_dirty_logs - extra))
+		kvm_mt_dirty_log_full(kvm, vcpu);
+
 	if (!vcpu)
 		spin_unlock(&kvm->dirty_log_lock);
 }
-- 
2.5.0




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html