exit timing analysis v1 - comments&discussions welcome

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi everyone,
I wrote a patch that tries to get some data where we spent our time. This should give us some data to think about when we talk about virtualization on non hardware assisted platforms like 440.

A good example what this data might help us to discuss is the comparison between paravirtualized and non paravirtualized guest (using the magic page mechanism I posted recently). You see that the pv extension reduces the number of exits for instruction emulation by 50%, but unfortunately that only reduced the time we spend for the emulation by ~20%. This data is for you information and comments/discussions are welcome (I expect that we need to trace the data of the emulation per instr. to identify the slow ones and think about their optimization).

The basic approach is to read TB[U&L] when entering the guest and when exiting it. The time from exit->enter is the time we spent to handle the exit and accounted per exit type. The time enter->exit is the time we spent in the guest and accounted too. For details I attached the patch I used to measure the timing. I verified it using with serveral assertions I now removed in the final version like "testing for double accounting of an exit" or "testing if we missed to account an exit". Additionally I verified the exit counts with the data reported by our kvm_stat support and that matches. Also the sum of accounted time matches the real time spent to run the test (Test was booting a ubuntu guest, logon as root, executing a ls, kill guest).

I think this pretty much explains why the guest is sometimes feeling slow :-).
Handle this data as semi-experimental and let me know what you think.

NO PV support
unit is usec
traced duration 202,244,233 usec
count min max sum avg stddev time% EMULINST 6,423,699 12 247,582 91,732,705 14.2804 241.200 45.36 ITLBVIRT 1,777,242 21 264,257 47,116,557 26.5111 286.040 23.30 DTLBVIRT 1,544,241 22 263,947 41,765,447 27.0459 218.997 20.65 TIMEINGUEST 10,000,000 0 3,673 10,861,792 1.0862 4.790 5.37 MMIO 9,318 47 10,686 3,544,934 380.4394 507.292 1.75 EXTINT 3,523 33 4,475 2,557,183 725.8538 619.026 1.26 DEC 16,937 50 4,866 1,441,084 85.0850 203.856 0.71 DTLBREAL 95,497 13 105 1,349,255 14.1288 2.811 0.67 ITLBREAL 80,622 13 146 1,079,052 13.3841 2.870 0.53 DSI 22,498 11 99 249,554 11.0923 1.975 0.12 DCR 4,854 41 3,758 232,657 47.9310 55.802 0.12 ISI 11,167 11 67 124,210 11.1230 2.242 0.06 SYSCALL 9,459 11 100 113,972 12.0491 2.763 0.06 SIGNAL 635 66 6,025 72,438 114.0756 335.235 0.04 FP_UNAVAIL 308 11 12 3,393 11.0162 0.126 0.00
*never hit USR_PR_INST, HCALL, HALT, DTLBPV, DEBUG

With PV Support
unit is usec
traced duration 136,640,181 usec
count min max sum avg stddev time% EMULINST 3,160,786 12 247,587 50,808,972 16.0748 205.445 37.18 ITLBVIRT 1,333,725 21 264,336 35,094,300 26.3130 235.204 25.68 DTLBVIRT 1,212,505 22 16,753 32,382,302 26.7069 51.475 23.70 TIMEINGUEST 6,000,000 0 3,846 7259,180 1.2099 9.317 5.31 MMIO 9,955 47 12,289 3570,654 358.6795 539.759 2.61 EXTINT 2,380 31 5,285 1791,705 752.8172 665.021 1.31 ITLBREAL 80,690 13 247,582 1320,621 16.3666 871.535 0.97 DTLBREAL 93,213 13 108 1315,333 14.1110 2.799 0.96 DTLBPV 46,941 20 3,696 1100,399 23.4422 50.127 0.81 DEC 10,376 50 3,672 928,878 89.5218 217.384 0.68 SIGNAL 658 66 264,544 339,911 516.5821 10303.957 0.25 DSI 21,799 11 87 242,343 11.1172 2.227 0.18 DCR 5,042 41 258 233,095 46.2307 6.465 0.17 ISI 11,582 11 66 128,625 11.1056 2.133 0.09 SYSCALL 10,052 11 59 120,597 11.9973 2.415 0.09 FP_UNAVAIL 295 11 12 3,247 11.0068 0.082 0.00 HCALL 1 19 19 19 19.0000 0.000 0.00
*never hit USR_PR_INST, HALT, DEBUG


--

Grüsse / regards, Christian Ehrhardt
IBM Linux Technology Center, Open Virtualization

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -346,6 +346,11 @@
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+
+	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu));
+	DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbl));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, arch.timing_last_enter.tv32.tbu));
+	DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, arch.timing_last_enter.tv32.tbl));
 #endif
 
 	return 0;
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -56,6 +56,14 @@
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ NULL }
 };
+
+u32 last_exit_type;
+u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+u64 timing_last_exit;
 
 static const u32 interrupt_msr_mask[16] = {
 	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
@@ -261,6 +269,9 @@
 	enum emulation_result er;
 	int r = RESUME_HOST;
 
+	/* update before a new last_exit_type is written */
+	update_exit_timing(&(vcpu->arch));
+
 	local_irq_enable();
 
 	run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -285,10 +296,13 @@
 		 * misses before ceding control. */
 		if (need_resched())
 			cond_resched();
-		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
+		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) {
 			vcpu->stat.dec_exits++;
-		else
+			set_last_exit_type(DEC_EXITS);
+		} else {
 			vcpu->stat.ext_intr_exits++;
+			set_last_exit_type(EXT_INTR_EXITS);
+		}
 		r = RESUME_GUEST;
 		break;
 
@@ -299,6 +313,7 @@
 			vcpu->arch.esr = vcpu->arch.fault_esr;
 			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
 			r = RESUME_GUEST;
+			set_last_exit_type(USR_PR_INST);
 			break;
 		}
 
@@ -308,10 +323,12 @@
 			/* Future optimization: only reload non-volatiles if
 			 * they were actually modified by emulation. */
 			vcpu->stat.emulated_inst_exits++;
+			set_last_exit_type(EMULATED_INST_EXITS);
 			r = RESUME_GUEST_NV;
 			break;
 		case EMULATE_DO_DCR:
 			run->exit_reason = KVM_EXIT_DCR;
+			set_last_exit_type(DCR_EXITS);
 			r = RESUME_HOST;
 			break;
 		case EMULATE_FAIL:
@@ -331,6 +348,7 @@
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
 		kvmppc_queue_exception(vcpu, exit_nr);
+		set_last_exit_type(FP_UNAVAIL);
 		r = RESUME_GUEST;
 		break;
 
@@ -339,6 +357,7 @@
 		vcpu->arch.esr = vcpu->arch.fault_esr;
 		kvmppc_queue_exception(vcpu, exit_nr);
 		vcpu->stat.dsi_exits++;
+		set_last_exit_type(DSI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
@@ -346,6 +365,7 @@
 		vcpu->arch.esr = vcpu->arch.fault_esr;
 		kvmppc_queue_exception(vcpu, exit_nr);
 		vcpu->stat.isi_exits++;
+		set_last_exit_type(ISI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
@@ -353,9 +373,11 @@
 		if (vcpu->arch.last_inst == KVM_HYPERCALL_BIN) {
 			kvmppc_do_hypercall(vcpu);
 			vcpu->stat.hcall_exits++;
+			set_last_exit_type(HCALL_EXITS);
 		} else {
 			kvmppc_queue_exception(vcpu, exit_nr);
 			vcpu->stat.syscall_exits++;
+			set_last_exit_type(SYSCALL_EXITS);
 		}
 		r = RESUME_GUEST;
 		break;
@@ -370,6 +392,7 @@
 			 vcpu->arch.pvmem_gpaddr >> KVM_PPCPV_MAGIC_PAGE_SHIFT,
 			 0, KVM_PPCPV_MAGIC_PAGE_FLAGS);
 			vcpu->stat.dtlb_pvmem_miss_exits++;
+			set_last_exit_type(DTLB_PVMEM_MISS_EXITS);
 			r = RESUME_GUEST;
 			break;
 		}
@@ -382,6 +405,7 @@
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
 			vcpu->stat.dtlb_real_miss_exits++;
+			set_last_exit_type(DTLB_REAL_MISS_EXITS);
 			r = RESUME_GUEST;
 			break;
 		}
@@ -399,11 +423,13 @@
 			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
 			               gtlbe->word2);
 			vcpu->stat.dtlb_virt_miss_exits++;
+			set_last_exit_type(DTLB_VIRT_MISS_EXITS);
 			r = RESUME_GUEST;
 		} else {
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
 			r = kvmppc_emulate_mmio(run, vcpu);
+			set_last_exit_type(MMIO_EXITS);
 		}
 
 		break;
@@ -422,10 +448,12 @@
 			/* The guest didn't have a mapping for it. */
 			kvmppc_queue_exception(vcpu, exit_nr);
 			vcpu->stat.itlb_real_miss_exits++;
+			set_last_exit_type(ITLB_REAL_MISS_EXITS);
 			break;
 		}
 
 		vcpu->stat.itlb_virt_miss_exits++;
+		set_last_exit_type(ITLB_VIRT_MISS_EXITS);
 
 		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
 
@@ -458,6 +486,7 @@
 		mtspr(SPRN_DBSR, dbsr);
 
 		run->exit_reason = KVM_EXIT_DEBUG;
+		set_last_exit_type(DEBUG_EXITS);
 		r = RESUME_HOST;
 		break;
 	}
@@ -482,6 +511,7 @@
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 
 			vcpu->stat.signal_exits++;
+			set_last_exit_type(SIGNAL_EXITS);
 		} else {
 			vcpu->stat.light_exits++;
 		}
@@ -495,6 +525,7 @@
 			break;
 		case KVM_EXIT_INTR:
 			vcpu->stat.signal_exits++;
+			set_last_exit_type(SIGNAL_EXITS);
 			break;
 		}
 	}
@@ -535,6 +566,8 @@
 	 * real timebase frequency. Accordingly, it must see the state of
 	 * CCR1[TCS]. */
 	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+	init_exit_timing(&(vcpu->arch));
 
 	return 0;
 }
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -109,6 +109,16 @@
 
 	li	r6, 1
 	slw	r6, r6, r5
+
+	/* save exit time */
+..exit_tbu_overflow_loop:
+	mfspr	r7, SPRN_TBRU
+	mfspr	r8, SPRN_TBRL
+	mfspr	r9, SPRN_TBRU
+	cmpw	r9, r7
+	bne	..exit_tbu_overflow_loop
+	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
+	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
 
 	/* Save the faulting instruction and all GPRs for emulation. */
 	andi.	r7, r6, NEED_INST_MASK
@@ -415,6 +425,17 @@
 	lwz	r3, VCPU_SPRG7(r4)
 	mtspr	SPRN_SPRG7, r3
 
+	/* save enter time */
+
+..enter_tbu_overflow_loop:
+	mfspr	r6, SPRN_TBRU
+	mfspr	r7, SPRN_TBRL
+	mfspr	r8, SPRN_TBRU
+	cmpw	r8, r6
+	bne	..enter_tbu_overflow_loop
+	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+
 	/* Finish loading guest volatiles and jump to guest. */
 	lwz	r3, VCPU_CTR(r4)
 	mtctr	r3
diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h
--- a/include/asm-powerpc/kvm_host.h
+++ b/include/asm-powerpc/kvm_host.h
@@ -25,6 +25,8 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <asm/kvm_asm.h>
+#include <asm/time.h>
+#include <asm-generic/div64.h>
 
 #define KVM_MAX_VCPUS 1
 #define KVM_MEMORY_SLOTS 32
@@ -66,6 +68,53 @@
 	u32 halt_wakeup;
 };
 
+enum kvm_exit_types {
+	MMIO_EXITS,
+	DCR_EXITS,
+	SIGNAL_EXITS,
+	ITLB_REAL_MISS_EXITS,
+	ITLB_VIRT_MISS_EXITS,
+	DTLB_REAL_MISS_EXITS,
+	DTLB_PVMEM_MISS_EXITS,
+	DTLB_VIRT_MISS_EXITS,
+	SYSCALL_EXITS,
+	HCALL_EXITS,
+	ISI_EXITS,
+	DSI_EXITS,
+	EMULATED_INST_EXITS,
+	DEC_EXITS,
+	EXT_INTR_EXITS,
+	HALT_WAKEUP,
+	USR_PR_INST,
+	FP_UNAVAIL,
+	DEBUG_EXITS,
+	TIMEINGUEST,
+	__NUMBER_OF_KVM_EXIT_TYPES
+};
+
+static const char* kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+	"MMIO",
+	"DCR",
+	"SIGNAL",
+	"ITLBREAL",
+	"ITLBVIRT",
+	"DTLBREAL",
+	"DTLBPV",
+	"DTLBVIRT",
+	"SYSCALL",
+	"HCALL",
+	"ISI",
+	"DSI",
+	"EMULINST",
+	"DEC",
+	"EXTINT",
+	"HALT",
+	"USR_PR_INST",
+	"FP_UNAVAIL",
+	"DEBUG",
+	"TIMEINGUEST"
+};
+
 struct tlbe {
 	u32 tid; /* Only the low 8 bits are used. */
 	u32 word0;
@@ -75,6 +124,14 @@
 
 struct kvm_arch {
 };
+
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+typedef union {
+	u64 tv64;
+	struct {
+		u32 tbu, tbl;
+	} tv32;
+} exit_timing_t;
 
 struct kvm_vcpu_arch {
 	/* Unmodified copy of the guest's TLB. */
@@ -139,6 +196,10 @@
 	u32 dbcr0;
 	u32 dbcr1;
 
+	/* to trace exit timing */
+	exit_timing_t timing_exit;
+	exit_timing_t timing_last_enter;
+
 	u32 last_inst;
 	u32 fault_dear;
 	u32 fault_esr;
@@ -165,4 +226,121 @@
 	int singlestep;
 };
 
+/* helpers for instruction timing, allocated in arch/powerpc/kvm/powerpc.c */
+extern u32 last_exit_type;
+extern u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+extern u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+extern u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+extern u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+extern u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+extern u64 timing_last_exit;
+
+static inline void init_exit_timing(struct kvm_vcpu_arch* vcpu_arch)
+{
+	int i;
+	last_exit_type=0xDEAD;
+	for (i=0; i<__NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		timing_count_type[i]=0;
+		timing_max_duration[i]=0;
+		timing_min_duration[i]=0xFFFFFFFF;
+		timing_sum_duration[i]=0;
+		timing_sum_quad_duration[i]=0;
+	}
+	vcpu_arch->timing_exit.tv64 = 0;
+	timing_last_exit = 0;
+	vcpu_arch->timing_last_enter.tv64 = 0;
+	printk(KERN_ERR"### initialized exit timing (print start timestamp to log) ###\n");
+}
+
+static inline void set_last_exit_type(int type)
+{
+	last_exit_type=type;
+}
+
+static inline void print_exit_timing(void)
+{
+	int i;
+	u64 min, max;
+
+	printk(KERN_ERR"### start exit timing statistics ###\n");
+	for (i=0; i<__NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		if (timing_min_duration[i]==0xFFFFFFFF)
+			min = 0;
+		else
+			min = timing_min_duration[i];
+		if (timing_max_duration[i]==0)
+			max = 0;
+		else
+			max = timing_max_duration[i];
+
+		printk(KERN_ERR"exit type %2d (%12s) count %10d min %10lld max %10lld sum %20lld sum_quad %20lld\n",
+				i, kvm_exit_names[i], timing_count_type[i], timing_min_duration[i], timing_max_duration[i],
+				timing_sum_duration[i], timing_sum_quad_duration[i]);
+	}
+	printk(KERN_ERR"please post calc: avg = sum/count; stddev = sqrt((sum_quad/count)-(sum_dur/count)²)\n");
+	printk(KERN_ERR"### end exit timing statistics ###\n");
+}
+
+static inline void add_exit_timing(u64 duration, int type)
+{
+	u64 old; 
+	
+	do_div(duration, tb_ticks_per_usec);
+	if (unlikely(duration > 0xFFFFFFFF)) {
+		printk(KERN_ERR"%s - duration too big, **2 would overflow duration %lld max 4294967295 type %d exit # of type %d (tb_ticks_per_usec=%ld)\n",
+				__func__, duration, type, timing_count_type[type], tb_ticks_per_usec);
+		print_exit_timing();
+	}
+		
+
+	timing_count_type[type]++;
+
+	/* sum */
+	old = timing_sum_duration[type];
+	timing_sum_duration[type] += duration;
+	if (unlikely(old > timing_sum_duration[type])) {
+		printk(KERN_ERR"%s - wraparound adding sum of durations old %lld new %lld type %d exit # of type %d\n",
+				__func__, old, timing_sum_duration[type], type, timing_count_type[type]);
+		print_exit_timing();
+	}
+
+	/* square sum */
+	old = timing_sum_quad_duration[type];
+	timing_sum_quad_duration[type] += (duration*duration);
+	if (unlikely(old > timing_sum_quad_duration[type])) {
+		printk(KERN_ERR"%s - wraparound adding sum of squared durations old %lld new %lld type %d exit # of type %d\n",
+				__func__, old, timing_sum_quad_duration[type], type, timing_count_type[type]);
+		print_exit_timing();
+	}
+
+	/* set min/max */
+	if (unlikely(duration < timing_min_duration[type]))
+		timing_min_duration[type] = duration;
+	if (unlikely(duration > timing_max_duration[type]))
+		timing_max_duration[type] = duration;
+}
+
+static inline void update_exit_timing(struct kvm_vcpu_arch* vcpu_arch)
+{
+	u64 exit = timing_last_exit;
+	u64 enter = vcpu_arch->timing_last_enter.tv64;
+
+	/* save exit time to use it next exit when the related enter time is known */
+	timing_last_exit = vcpu_arch->timing_exit.tv64;
+
+	if (unlikely(last_exit_type==0xDEAD))
+		return; /* skip first incomplete enter/exit cycle */
+
+	/* update statistics for average and standard deviation */
+	add_exit_timing((enter - exit), last_exit_type);
+	/* enter -> timing_last_exit is time spent in guest - log this too */
+	add_exit_timing((timing_last_exit - enter), TIMEINGUEST);
+
+	/* report regularly every 100000 exits*/
+	if (unlikely(timing_count_type[TIMEINGUEST] % 1000000 == 0)) {
+		printk(KERN_ERR"%s - %d exits report timing statistics\n", __func__,timing_count_type[TIMEINGUEST]);
+		print_exit_timing();
+	}
+}
+
 #endif /* __POWERPC_KVM_HOST_H__ */

[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux