On Mon, Jun 25, 2018 at 11:40:12AM +0200, Igor Mammedov wrote: > On Fri, 22 Jun 2018 22:22:05 +0300 > "Michael S. Tsirkin" <mst@xxxxxxxxxx> wrote: > > > With this flag, kvm allows guest to control host CPU power state. This > > increases latency for other processes using same host CPU in an > > unpredictable way, but if decreases idle entry/exit times for the > > running VCPU, so to use it QEMU needs a hint about whether host CPU is > > overcommitted, hence the flag name. > > > > Follow-up patches will expose this capability to guest > > (using mwait leaf). > > > > Based on a patch by Wanpeng Li <kernellwp@xxxxxxxxx> . > > > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > > --- > > include/sysemu/sysemu.h | 1 + > > target/i386/kvm.c | 23 +++++++++++++++++++++++ > > vl.c | 32 +++++++++++++++++++++++++++++++- > > qemu-options.hx | 27 +++++++++++++++++++++++++-- > > 4 files changed, 80 insertions(+), 3 deletions(-) > > > > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > > index e893f72f3b..b921c6f3b7 100644 > > --- a/include/sysemu/sysemu.h > > +++ b/include/sysemu/sysemu.h > > @@ -128,6 +128,7 @@ extern bool boot_strict; > > extern uint8_t *boot_splash_filedata; > > extern size_t boot_splash_filedata_size; > > extern bool enable_mlock; > > +extern bool enable_cpu_pm; > > extern uint8_t qemu_extra_params_fw[2]; > > extern QEMUClockType rtc_clock; > > extern const char *mem_path; > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c > > index 44f70733e7..cf9107be4b 100644 > > --- a/target/i386/kvm.c > > +++ b/target/i386/kvm.c > > @@ -1357,6 +1357,29 @@ int kvm_arch_init(MachineState *ms, KVMState *s) > > smram_machine_done.notify = register_smram_listener; > > qemu_add_machine_init_done_notifier(&smram_machine_done); > > } > > + > > + if (enable_cpu_pm) { > > + int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS); > > + int ret; > > + > > +/* Work around for kernel header with a typo. TODO: fix header and drop. */ > > +#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT) > > +#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL > > +#endif > > + if (disable_exits) { > > + disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | > > + KVM_X86_DISABLE_EXITS_HLT | > > + KVM_X86_DISABLE_EXITS_PAUSE); > > + } > > + > > + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, > > + disable_exits); > > + if (ret < 0) { > > + error_report("kvm: guest stopping CPU not supported: %s", > > + strerror(-ret)); > > + } > > + } > > + > > return 0; > > } > > > > diff --git a/vl.c b/vl.c > > index 06031715ac..c9530efed5 100644 > > --- a/vl.c > > +++ b/vl.c > > @@ -142,6 +142,7 @@ ram_addr_t ram_size; > > const char *mem_path = NULL; > > int mem_prealloc = 0; /* force preallocation of physical target memory */ > > bool enable_mlock = false; > > +bool enable_cpu_pm = false; > > int nb_nics; > > NICInfo nd_table[MAX_NICS]; > > int autostart; > > @@ -390,6 +391,22 @@ static QemuOptsList qemu_realtime_opts = { > > }, > > }; > > > > +static QemuOptsList qemu_overcommit_opts = { > > + .name = "overcommit", > > + .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head), > > + .desc = { > > + { > > + .name = "mem-lock", > > + .type = QEMU_OPT_BOOL, > > + }, > > + { > > + .name = "cpu-pm", > > + .type = QEMU_OPT_BOOL, > > + }, > > + { /* end of list */ } > > + }, > > +}; > > + > > static QemuOptsList qemu_msg_opts = { > > .name = "msg", > > .head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head), > > @@ -3903,7 +3920,20 @@ int main(int argc, char **argv, char **envp) > > if (!opts) { > > exit(1); > > } > > - enable_mlock = qemu_opt_get_bool(opts, "mlock", true); > > + /* Don't override the -overcommit option if set */ > > + enable_mlock = enable_mlock || > > + qemu_opt_get_bool(opts, "mlock", true); > > + break; > > + case QEMU_OPTION_overcommit: > > + opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"), > > + optarg, false); > > + if (!opts) { > > + exit(1); > > + } > > + /* Don't override the -realtime option if set */ > > + enable_mlock = enable_mlock || > > + qemu_opt_get_bool(opts, "mem-lock", false); > > + enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false); > should we error out or complain if the option can't be used > /i.e. in case of non kvm accelerator/ > instead of silently ignoring it > and making user wonder why it doesn't work? Well it also only applies with -cpu host right now. And guest works fine, even if it's somewhat slower. As there's no interface to discover which configurations work, I *suspect* it's easier for management if we keep guest running rather than fail and make it guess. > > break; > > case QEMU_OPTION_msg: > > opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg, > > diff --git a/qemu-options.hx b/qemu-options.hx > > index c0d3951e9f..1bba3d258b 100644 > > --- a/qemu-options.hx > > +++ b/qemu-options.hx > > @@ -3328,8 +3328,7 @@ DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, > > "-realtime [mlock=on|off]\n" > > " run qemu with realtime features\n" > > " mlock=on|off controls mlock support (default: on)\n", > > - QEMU_ARCH_ALL) > > -STEXI > > + QEMU_ARCH_ALL) STEXI > > @item -realtime mlock=on|off > > @findex -realtime > > Run qemu with realtime features. > > @@ -3337,6 +3336,30 @@ mlocking qemu and guest memory can be enabled via @option{mlock=on} > > (enabled by default). > > ETEXI > > > > +DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit, > > + "--overcommit [mem-lock=on|off][cpu-pm=on|off]\n" > > + " run qemu with overcommit hints\n" > > + " mem-lock=on|off controls memory lock support (default: off)\n" > > + " cpu-pm=on|off controls cpu power management (default: off)\n", > > + QEMU_ARCH_ALL) > > +STEXI > > +@item -overcommit mem-lock=on|off > > +@item -overcommit cpu-pm=on|off > > +@findex -overcommit > > +Run qemu with hints about host resource overcommit. The default is > > +to assume that host overcommits all resources. > > + > > +Locking qemu and guest memory can be enabled via @option{mem-lock=on} (disabled > > +by default). This works when host memory is not overcommitted and reduces the > > +worst-case latency for guest. This is equivalent to @option{realtime}. > > + > > +Guest ability to manage power state of host cpus (increasing latency for other > > +processes on the same host cpu, but decreasing latency for guest) can be > > +enabled via @option{cpu-pm=on} (disabled by default). This works best when > > +host CPU is not overcommitted. When used, host estimates of CPU cycle and power > > +utilization will be incorrect, not taking into account guest idle time. > > +ETEXI > > + > > DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \ > > "-gdb dev wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL) > > STEXI