On Wed, Sep 2, 2015 at 12:29 AM, Wanpeng Li <wanpeng.li@xxxxxxxxxxx> wrote: > There is a downside of always-poll since poll is still happened for idle > vCPUs which can waste cpu usage. This patch adds the ability to adjust > halt_poll_ns dynamically, to grow halt_poll_ns when shot halt is detected, > and to shrink halt_poll_ns when long halt is detected. > > There are two new kernel parameters for changing the halt_poll_ns: > halt_poll_ns_grow and halt_poll_ns_shrink. > > no-poll always-poll dynamic-poll > ----------------------------------------------------------------------- > Idle (nohz) vCPU %c0 0.15% 0.3% 0.2% > Idle (250HZ) vCPU %c0 1.1% 4.6%~14% 1.2% > TCP_RR latency 34us 27us 26.7us > > "Idle (X) vCPU %c0" is the percent of time the physical cpu spent in > c0 over 60 seconds (each vCPU is pinned to a pCPU). (nohz) means the > guest was tickless. (250HZ) means the guest was ticking at 250HZ. > > The big win is with ticking operating systems. Running the linux guest > with nohz=off (and HZ=250), we save 3.4%~12.8% CPUs/second and get close > to no-polling overhead levels by using the dynamic-poll. The savings > should be even higher for higher frequency ticks. > > Suggested-by: David Matlack <dmatlack@xxxxxxxxxx> > Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> > --- > virt/kvm/kvm_main.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 58 insertions(+), 3 deletions(-) > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index c06e57c..3cff02f 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -66,9 +66,18 @@ > MODULE_AUTHOR("Qumranet"); > MODULE_LICENSE("GPL"); > > -static unsigned int halt_poll_ns; > +/* halt polling only reduces halt latency by 5-7 us, 500us is enough */ > +static unsigned int halt_poll_ns = 500000; > module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); > > +/* Default doubles per-vcpu halt_poll_ns. */ > +static unsigned int halt_poll_ns_grow = 2; > +module_param(halt_poll_ns_grow, int, S_IRUGO); > + > +/* Default resets per-vcpu halt_poll_ns . */ > +static unsigned int halt_poll_ns_shrink; > +module_param(halt_poll_ns_shrink, int, S_IRUGO); > + > /* > * Ordering of locks: > * > @@ -1907,6 +1916,31 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) > } > EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); > > +static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) > +{ > + int val = vcpu->halt_poll_ns; > + > + /* 10us base */ > + if (val == 0 && halt_poll_ns_grow) > + val = 10000; > + else > + val *= halt_poll_ns_grow; > + > + vcpu->halt_poll_ns = val; > +} > + > +static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) > +{ > + int val = vcpu->halt_poll_ns; > + > + if (halt_poll_ns_shrink == 0) > + val = 0; > + else > + val /= halt_poll_ns_shrink; > + > + vcpu->halt_poll_ns = val; > +} > + > static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) > { > if (kvm_arch_vcpu_runnable(vcpu)) { > @@ -1929,6 +1963,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) > ktime_t start, cur; > DEFINE_WAIT(wait); > bool waited = false; > + u64 poll_ns = 0, wait_ns = 0, block_ns = 0; > > start = cur = ktime_get(); > if (vcpu->halt_poll_ns) { > @@ -1941,10 +1976,15 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) > */ > if (kvm_vcpu_check_block(vcpu) < 0) { > ++vcpu->stat.halt_successful_poll; > - goto out; > + break; > } > cur = ktime_get(); > } while (single_task_running() && ktime_before(cur, stop)); > + > + if (ktime_before(cur, stop)) { You can't use 'cur' to tell if the interrupt arrived. single_task_running() can break us out of the loop before 'stop'. > + poll_ns = ktime_to_ns(cur) - ktime_to_ns(start); Put this line before the if(). block_ns should always include the time spent polling; even if polling does not succeed. > + goto out; > + } > } > > for (;;) { > @@ -1959,9 +1999,24 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) > > finish_wait(&vcpu->wq, &wait); > cur = ktime_get(); > + wait_ns = ktime_to_ns(cur) - ktime_to_ns(start); > > out: > - trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited); > + block_ns = poll_ns + wait_ns; > + > + if (halt_poll_ns) { If you want, you can leave this if() out and save some indentation. > + if (block_ns <= vcpu->halt_poll_ns) > + ; > + /* we had a long block, shrink polling */ > + else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) > + shrink_halt_poll_ns(vcpu); > + /* we had a short halt and our poll time is too small */ > + else if (vcpu->halt_poll_ns < halt_poll_ns && > + block_ns < halt_poll_ns) > + grow_halt_poll_ns(vcpu); > + } > + > + trace_kvm_vcpu_wakeup(block_ns, waited); > } > EXPORT_SYMBOL_GPL(kvm_vcpu_block); > > -- > 1.9.1 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html