Hello, all.
The PLE handler attempts to determine an alternate vCPU to schedule. In
some cases the wrong vCPU is scheduled and performance suffers.
This patch allows for the guest OS to signal, using a hypercall, that
it's starting/ending a critical section. Using this information in the
PLE handler allows for a more intelligent VCPU scheduling determination
to be made. The patch only changes the PLE behaviour if this new
hypercall mechanism is used; if it isn't used, then the existing PLE
algorithm continues to be used to determine the next vCPU.
Benefit from the patch:
- the guest OS real time performance being significantly improved
when using hyper call marking entering and leaving guest OS kernel state.
- The guest OS system clock jitter measured on on Intel E5 2620
reduced from 400ms down to 6ms.
- The guest OS system lock is set to a 2ms clock interrupt. The jitter
is measured by the difference between dtsc() value in clock interrupt
handler and the expectation of tsc value.
- detail of test report is attached as reference.
Path details:
From 77edfa193a4e29ab357ec3b1e097f8469d418507 Mon Sep 17 00:00:00 2001
From: Bin BL LI <bin.bl.li@xxxxxxxxxxxxxxxxxx>
Date: Mon, 3 Mar 2014 11:23:35 -0500
Subject: [PATCH] Initial commit
---
arch/x86/kvm/x86.c | 7 +++++++
include/linux/kvm_host.h | 16 ++++++++++++++++
include/uapi/linux/kvm_para.h | 2 ++
virt/kvm/kvm_main.c | 14 +++++++++++++-
4 files changed, 38 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 39c28f0..e735de3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5582,6 +5582,7 @@ void kvm_arch_exit(void)
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
+ kvm_vcpu_set_holding_lock(vcpu,false);
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
return 1;
@@ -5708,6 +5709,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
ret = 0;
break;
+ case KVM_HC_LOCK_GET:
+ kvm_vcpu_set_holding_lock(vcpu,true);
+ break;
+ case KVM_HC_LOCK_RELEASE:
+ kvm_vcpu_set_holding_lock(vcpu,false);
+ break;
default:
ret = -KVM_ENOSYS;
break;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b8e9a43..f24892e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -266,6 +266,7 @@ struct kvm_vcpu {
bool in_spin_loop;
bool dy_eligible;
} spin_loop;
+ bool holding_lock;
#endif
bool preempted;
struct kvm_vcpu_arch arch;
@@ -403,6 +404,10 @@ struct kvm {
#endif
long tlbs_dirty;
struct list_head devices;
+
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+ bool using_lock_flag;
+#endif
};
#define kvm_err(fmt, ...) \
@@ -1076,6 +1081,13 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
vcpu->spin_loop.dy_eligible = val;
}
+static inline void kvm_vcpu_set_holding_lock(struct kvm_vcpu *vcpu, bool val)
+{
+ if ( ! vcpu->kvm->using_lock_flag )
+ vcpu->kvm->using_lock_flag = true;
+ vcpu->holding_lock = val;
+}
+
#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
@@ -1085,6 +1097,10 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
}
+
+static inline void kvm_vcpu_set_holding_lock(struct kvm_vcpu *vcpu, bool val)
+{
+}
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
#endif
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 2841f86..2c563a1 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -20,6 +20,8 @@
#define KVM_HC_FEATURES 3
#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
#define KVM_HC_KICK_CPU 5
+#define KVM_HC_LOCK_GET 6
+#define KVM_HC_LOCK_RELEASE 7
/*
* hypercalls use architecture specific
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 03a0381..c3a5046 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -232,6 +232,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
+ kvm_vcpu_set_holding_lock(vcpu, false);
vcpu->preempted = false;
r = kvm_arch_vcpu_init(vcpu);
@@ -502,6 +503,10 @@ static struct kvm *kvm_create_vm(unsigned long type)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+ kvm->using_lock_flag = false;
+#endif
+
return kvm;
out_err:
@@ -1762,9 +1767,16 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
bool eligible;
- eligible = !vcpu->spin_loop.in_spin_loop ||
+ if ( ! vcpu->kvm->using_lock_flag )
+ {
+ eligible = !vcpu->spin_loop.in_spin_loop ||
(vcpu->spin_loop.in_spin_loop &&
vcpu->spin_loop.dy_eligible);
+ }
+ else
+ {
+ eligible = vcpu->holding_lock; /* if holding any lock, yield to it */
+ }
if (vcpu->spin_loop.in_spin_loop)
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
--
1.7.1
~/ref/kvm_git >
Regards
Bin
System clock jitter measure result.
Using 8 vCpu SMP guest OS with using hypercall mark getting/leaving
guest OS kernel state also the KVM path to boost the lock holder vCPU.
The max jitter at run time is 6ms
The two 200ms jitter below was from guest OS system initialization
stage instead of run time clock jitter.
======================================================================
11-> sysClkIntShow
OSclock:0x80f8760
-- intrlen: 6804074 [ 1999 us]
-- tickLen: 34020374 [ 9999 us]
-- tickWin: 5103056 [ 1499 us]
-- syncOff: 0 [ 0 us]
-- timeerr: -17387492 [ -5.111 ms] [0.0004 %] [sync: -0.071 ms]
-- intrerr: 0xffffffff99f36b1a [ -0.503 sec] [0.0404 %]
+---------+--------------------+--------------------+-------------------------+
| ts | init | last | time [ms] |
+---------+--------------------+--------------------+-------------------------+
| cpu_tsc | 0x000000031e754598 | 0x000003de99c1c6f8 | 1246665.096 |
| vxticks | 0x00000000ffff1613 | 0x000000000000fd0d | 1246659.985 |
+---------+--------------------+--------------------+-------------------------+
+---------+--------------------+--------------------+-------------------------+
| counter | count | time [ms] | delta [ms] [%] |
+---------+--------------------+--------------------+---------------+---------+
| cpu_tsc | 4241201332576 | 1246665.096 | +0.000 | +0.0000 |
| vxticks | 124666 | 1246659.985 | -5.111 | -0.0004 |
| clk_obj | 124666 | 1246659.985 | +5.040 | +0.0004 |
| clkintr | 623081 | 1246161.839 | -503.257 | -0.0404 |
+---------+--------------------+--------------------+---------------+---------+
OSclock [0x80f8760] interrupt-source histogram
-- clk freq: 500 Hz
-- clk intr: 623082
+------+---------------------+----------------------------+
| pos# | Interval [ms] | clk ticks % |
+------+---------------------+-------------------+--------+
| 0 | 0.000 .. 0.308 | 40 | 0.01 |
| 1 | 0.308 .. 0.616 | 16 | 0.00 |
| 2 | 0.616 .. 0.925 | 22 | 0.00 |
| 3 | 0.925 .. 1.233 | 14 | 0.00 |
| 4 | 1.233 .. 1.541 | 16 | 0.00 |
| 5 | 1.541 .. 1.849 | 51 | 0.01 |
| 6 | 1.849 .. 2.158 | 622769 | 99.95 |
| 7 | 2.158 .. 2.466 | 43 | 0.01 |
| 8 | 2.466 .. 2.774 | 13 | 0.00 |
| 9 | 2.774 .. 3.082 | 13 | 0.00 |
| 10 | 3.082 .. 3.390 | 17 | 0.00 |
| 11 | 3.390 .. 3.699 | 11 | 0.00 |
| 12 | 3.699 .. 4.007 | 4 | 0.00 |
| 13 | 4.007 .. 4.315 | 5 | 0.00 |
| 14 | 4.315 .. 4.623 | 3 | 0.00 |
| 15 | 4.623 .. 4.932 | 2 | 0.00 |
| 16 | 4.932 .. 5.240 | 1 | 0.00 |
| 17 | 5.240 .. 5.548 | 5 | 0.00 |
| 18 | 5.548 .. 5.856 | 26 | 0.00 |
| 19 | 5.856 .. 6.164 | 8 | 0.00 |
| 20 | 6.164 .. 6.473 | 2 | 0.00 |
| 663 | 204.350 .. 204.658 | 1 | 0.00 |
| 668 | 205.891 .. 206.199 | 1 | 0.00 |
+------+---------------------+-------------------+--------+
| ---- | | 623083 | 100.00 |
+------+---------------------+-------------------+--------+
System clock jitter measure result.
Using 8 vCpu SMP guest OS without hypercall and the vanilla KVM PLE handler.
The system clock jitter (run time) in guest OS coule be bigger than 400ms.
======================================================================
11-> sysClkIntShow
OSclock:0x8055760
-- intrlen: 6804067 [ 1999 us]
-- tickLen: 34020337 [ 10000 us]
-- tickWin: 5103050 [ 1499 us]
-- syncOff: 0 [ 0 us]
-- timeerr: -22314665 [ -6.559 ms] [0.0009 %] [sync: -0.032 ms]
-- intrerr: 0xfffffffaca02d1f3 [ -6.579 sec] [0.9233 %]
+---------+--------------------+--------------------+-------------------------+
| ts | init | last | time [ms] |
+---------+--------------------+--------------------+-------------------------+
| cpu_tsc | 0x00000003359de5e8 | 0x000002379389f1d2 | 712496.568 |
| vxticks | 0x00000000ffff160f | 0x0000000000002c60 | 712490.008 |
+---------+--------------------+--------------------+-------------------------+
+---------+--------------------+--------------------+-------------------------+
| counter | count | time [ms] | delta [ms] [%] |
+---------+--------------------+--------------------+---------------+---------+
| cpu_tsc | 2423937305578 | 712496.568 | +0.000 | +0.0000 |
| vxticks | 71249 | 712490.008 | -6.559 | -0.0009 |
| clk_obj | 71249 | 712490.008 | +6.527 | +0.0009 |
| clkintr | 352959 | 705917.967 | -6578.601 | -0.9233 |
+---------+--------------------+--------------------+---------------+---------+
OSclock [0x8055760] interrupt-source histogram
-- clk freq: 500 Hz
-- clk intr: 352959
+------+---------------------+----------------------------+
| pos# | Interval [ms] | clk ticks % |
+------+---------------------+-------------------+--------+
| 0 | 0.000 .. 0.308 | 270 | 0.08 |
| 1 | 0.308 .. 0.616 | 160 | 0.05 |
| 2 | 0.616 .. 0.925 | 165 | 0.05 |
| 3 | 0.925 .. 1.233 | 200 | 0.06 |
| 4 | 1.233 .. 1.541 | 182 | 0.05 |
| 5 | 1.541 .. 1.849 | 591 | 0.17 |
| 6 | 1.849 .. 2.158 | 349872 | 99.13 |
| 7 | 2.158 .. 2.466 | 530 | 0.15 |
| 8 | 2.466 .. 2.774 | 151 | 0.04 |
| 9 | 2.774 .. 3.082 | 123 | 0.03 |
| 10 | 3.082 .. 3.390 | 87 | 0.02 |
| 11 | 3.390 .. 3.699 | 65 | 0.02 |
| 12 | 3.699 .. 4.007 | 53 | 0.02 |
| 13 | 4.007 .. 4.315 | 38 | 0.01 |
| 14 | 4.315 .. 4.623 | 27 | 0.01 |
| 15 | 4.623 .. 4.932 | 34 | 0.01 |
| 16 | 4.932 .. 5.240 | 44 | 0.01 |
| 17 | 5.240 .. 5.548 | 24 | 0.01 |
| 18 | 5.548 .. 5.856 | 37 | 0.01 |
| 19 | 5.856 .. 6.164 | 32 | 0.01 |
| 20 | 6.164 .. 6.473 | 22 | 0.01 |
| 21 | 6.473 .. 6.781 | 30 | 0.01 |
| 22 | 6.781 .. 7.089 | 21 | 0.01 |
| 23 | 7.089 .. 7.397 | 12 | 0.00 |
| 24 | 7.397 .. 7.706 | 17 | 0.00 |
| 25 | 7.706 .. 8.014 | 13 | 0.00 |
| 26 | 8.014 .. 8.322 | 3 | 0.00 |
| 27 | 8.322 .. 8.630 | 9 | 0.00 |
| 28 | 8.630 .. 8.938 | 7 | 0.00 |
| 29 | 8.938 .. 9.247 | 7 | 0.00 |
| 30 | 9.247 .. 9.555 | 3 | 0.00 |
| 31 | 9.555 .. 9.863 | 2 | 0.00 |
| 32 | 9.863 .. 10.171 | 8 | 0.00 |
| 33 | 10.171 .. 10.479 | 6 | 0.00 |
| 34 | 10.479 .. 10.788 | 1 | 0.00 |
| 35 | 10.788 .. 11.096 | 3 | 0.00 |
| 36 | 11.096 .. 11.404 | 6 | 0.00 |
| 37 | 11.404 .. 11.712 | 1 | 0.00 |
| 38 | 11.712 .. 12.021 | 1 | 0.00 |
| 39 | 12.021 .. 12.329 | 2 | 0.00 |
| 40 | 12.329 .. 12.637 | 2 | 0.00 |
| 41 | 12.637 .. 12.945 | 3 | 0.00 |
| 42 | 12.945 .. 13.253 | 4 | 0.00 |
| 44 | 13.562 .. 13.870 | 2 | 0.00 |
| 45 | 13.870 .. 14.178 | 3 | 0.00 |
| 46 | 14.178 .. 14.486 | 2 | 0.00 |
| 47 | 14.486 .. 14.795 | 5 | 0.00 |
| 48 | 14.795 .. 15.103 | 3 | 0.00 |
| 49 | 15.103 .. 15.411 | 1 | 0.00 |
| 50 | 15.411 .. 15.719 | 2 | 0.00 |
| 51 | 15.719 .. 16.027 | 3 | 0.00 |
| 53 | 16.336 .. 16.644 | 2 | 0.00 |
| 54 | 16.644 .. 16.952 | 2 | 0.00 |
| 56 | 17.260 .. 17.569 | 2 | 0.00 |
| 57 | 17.569 .. 17.877 | 1 | 0.00 |
| 58 | 17.877 .. 18.185 | 1 | 0.00 |
| 60 | 18.493 .. 18.801 | 1 | 0.00 |
| 62 | 19.110 .. 19.418 | 1 | 0.00 |
| 64 | 19.726 .. 20.034 | 1 | 0.00 |
| 65 | 20.034 .. 20.343 | 1 | 0.00 |
| 66 | 20.343 .. 20.651 | 1 | 0.00 |
| 67 | 20.651 .. 20.959 | 1 | 0.00 |
| 71 | 21.884 .. 22.192 | 2 | 0.00 |
| 75 | 23.117 .. 23.425 | 1 | 0.00 |
| 76 | 23.425 .. 23.733 | 1 | 0.00 |
| 81 | 24.966 .. 25.274 | 2 | 0.00 |
| 82 | 25.274 .. 25.582 | 1 | 0.00 |
| 83 | 25.582 .. 25.891 | 3 | 0.00 |
| 85 | 26.199 .. 26.507 | 1 | 0.00 |
| 87 | 26.815 .. 27.123 | 1 | 0.00 |
| 90 | 27.740 .. 28.048 | 1 | 0.00 |
| 91 | 28.048 .. 28.356 | 2 | 0.00 |
| 99 | 30.514 .. 30.822 | 1 | 0.00 |
| 101 | 31.130 .. 31.438 | 1 | 0.00 |
| 107 | 32.980 .. 33.288 | 1 | 0.00 |
| 111 | 34.212 .. 34.521 | 1 | 0.00 |
| 119 | 36.678 .. 36.986 | 1 | 0.00 |
| 120 | 36.986 .. 37.295 | 1 | 0.00 |
| 122 | 37.603 .. 37.911 | 1 | 0.00 |
| 128 | 39.452 .. 39.760 | 1 | 0.00 |
| 129 | 39.760 .. 40.069 | 1 | 0.00 |
| 130 | 40.069 .. 40.377 | 1 | 0.00 |
| 137 | 42.226 .. 42.534 | 1 | 0.00 |
| 138 | 42.534 .. 42.843 | 1 | 0.00 |
| 140 | 43.151 .. 43.459 | 1 | 0.00 |
| 144 | 44.384 .. 44.692 | 1 | 0.00 |
| 161 | 49.623 .. 49.932 | 1 | 0.00 |
| 164 | 50.548 .. 50.856 | 1 | 0.00 |
| 182 | 56.096 .. 56.404 | 1 | 0.00 |
| 189 | 58.254 .. 58.562 | 1 | 0.00 |
| 192 | 59.178 .. 59.487 | 1 | 0.00 |
| 214 | 65.959 .. 66.267 | 1 | 0.00 |
| 230 | 70.891 .. 71.199 | 1 | 0.00 |
| 258 | 79.521 .. 79.829 | 1 | 0.00 |
| 279 | 85.993 .. 86.302 | 1 | 0.00 |
| 300 | 92.466 .. 92.774 | 1 | 0.00 |
| 309 | 95.240 .. 95.548 | 1 | 0.00 |
| 339 | 104.487 .. 104.795 | 1 | 0.00 |
| 411 | 126.679 .. 126.987 | 1 | 0.00 |
| 434 | 133.768 .. 134.076 | 1 | 0.00 |
| 463 | 142.706 .. 143.014 | 1 | 0.00 |
| 532 | 163.973 .. 164.281 | 1 | 0.00 |
| 537 | 165.514 .. 165.823 | 1 | 0.00 |
| 538 | 165.823 .. 166.131 | 1 | 0.00 |
| 631 | 194.487 .. 194.795 | 1 | 0.00 |
| 634 | 195.412 .. 195.720 | 1 | 0.00 |
| 707 | 217.912 .. 218.220 | 1 | 0.00 |
| 728 | 224.384 .. 224.693 | 1 | 0.00 |
| 735 | 226.542 .. 226.850 | 1 | 0.00 |
| 772 | 237.946 .. 238.254 | 1 | 0.00 |
| 924 | 284.796 .. 285.104 | 1 | 0.00 |
| 1346 | 414.865 .. 415.173 | 1 | 0.00 |
| 1360 | 419.180 .. 419.488 | 1 | 0.00 |
+------+---------------------+-------------------+--------+
| ---- | | 352959 | 100.00 |
+------+---------------------+-------------------+--------+