On 01/06/2011 11:56 AM, Marcelo Tosatti wrote:
From: Jan Kiszka<jan.kiszka@xxxxxxxxxxx> If kvmclock is used, which implies the kernel supports it, register a kvmclock device with the sysbus. Its main purpose is to save and restore the kernel state on migration, but this will also allow to visualize it one day. Signed-off-by: Jan Kiszka<jan.kiszka@xxxxxxxxxxx> CC: Glauber Costa<glommer@xxxxxxxxxx> Signed-off-by: Marcelo Tosatti<mtosatti@xxxxxxxxxx> --- target-i386/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 91 insertions(+), 1 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 69b8234..47cb22b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -29,6 +29,7 @@ #include "hw/apic.h" #include "ioport.h" #include "kvm_x86.h" +#include "hw/sysbus.h" #ifdef CONFIG_KVM_PARA #include<linux/kvm_para.h> @@ -309,6 +310,85 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, #endif } +#if defined(CONFIG_KVM_PARA)&& defined(KVM_CAP_ADJUST_CLOCK) +typedef struct KVMClockState { + SysBusDevice busdev; + uint64_t clock; + bool clock_valid; +} KVMClockState; + +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + struct kvm_clock_data data; + int ret; + + if (s->clock_valid) { + return; + } + ret = kvm_vm_ioctl(KVM_GET_CLOCK,&data); + if (ret< 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + data.clock = 0; + } + s->clock = data.clock; + /* + * If the VM is stopped, declare the clock state valid to avoid re-reading + * it on next vmsave (which would return a different value). Will be reset + * when the VM is continued. + */ + s->clock_valid = !vm_running; +} + +static int kvmclock_post_load(void *opaque, int version_id) +{ + KVMClockState *s = opaque; + struct kvm_clock_data data; + + data.clock = s->clock; + data.flags = 0; + return kvm_vm_ioctl(KVM_SET_CLOCK,&data); +} + +static void kvmclock_vm_state_change(void *opaque, int running, int reason) +{ + KVMClockState *s = opaque; + + if (running) { + s->clock_valid = false; + } +} + +static int kvmclock_init(SysBusDevice *dev) +{ + KVMClockState *s = FROM_SYSBUS(KVMClockState, dev); + + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); + return 0; +} + +static const VMStateDescription kvmclock_vmsd= { + .name = "kvmclock", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .pre_save = kvmclock_pre_save, + .post_load = kvmclock_post_load, + .fields = (VMStateField []) { + VMSTATE_UINT64(clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static SysBusDeviceInfo kvmclock_info = { + .qdev.name = "kvmclock", + .qdev.size = sizeof(KVMClockState), + .qdev.vmsd =&kvmclock_vmsd, + .qdev.no_user = 1, + .init = kvmclock_init, +}; +#endif /* CONFIG_KVM_PARA&& KVM_CAP_ADJUST_CLOCK */ + int kvm_arch_init_vcpu(CPUState *env) { struct { @@ -335,7 +415,6 @@ int kvm_arch_init_vcpu(CPUState *env) env->cpuid_svm_features&= kvm_x86_get_supported_cpuid(0x8000000A, 0, R_EDX); - cpuid_i = 0; #ifdef CONFIG_KVM_PARA @@ -442,6 +521,13 @@ int kvm_arch_init_vcpu(CPUState *env) } #endif +#if defined(CONFIG_KVM_PARA)&& defined(KVM_CAP_ADJUST_CLOCK) + if (cpu_is_bsp(env)&& + (env->cpuid_kvm_features& (1ULL<< KVM_FEATURE_CLOCKSOURCE))) { + sysbus_create_simple("kvmclock", -1, NULL); + } +#endif + return kvm_vcpu_ioctl(env, KVM_SET_CPUID2,&cpuid_data); } @@ -531,6 +617,10 @@ int kvm_arch_init(int smp_cpus) int ret; struct utsname utsname; +#if defined(CONFIG_KVM_PARA)&& defined(KVM_CAP_ADJUST_CLOCK) + sysbus_register_withprop(&kvmclock_info); +#endif + ret = kvm_get_supported_msrs(); if (ret< 0) { return ret;
There are a couple things wrong with this patch. It breaks compatibility because it does not allow kvmclock to be created or initiated in machines. Older machines didn't expose kvmclock but now they do. It also makes it impossible to pass parameters to kvmclock in the future because the device creation is hidden deep in other code paths. Calling any qdev creation function in anything but pc.c (or the equivalent) should be a big red flag.
The solution is simple, introduce as kvm_has_clocksource(). Within the machine init, create the the kvm clock device after CPU creation wrapped in a if (kvm_has_clocksource()) call. kvmclock should be created with kvm_state as a parameter and kvm_vm_ioctl() is passed the stored reference. Taking a global reference to kvm_state in machine_init is not a bad thing, obviously the machine initialization function needs access to the kvm_state.
Regards, Anthony Liguori -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html