[PATCH 14/21] qemu-kvm: Rework VCPU state writeback API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This grand cleanup drops all reset and vmsave/load related
synchronization points in favor of four(!) generic hooks:

- cpu_synchronize_all_states in qemu_savevm_state_complete
  (initial sync from kernel before vmsave)
- cpu_synchronize_all_post_init in qemu_loadvm_state
  (writeback after vmload)
- cpu_synchronize_all_post_init in main after machine init
- cpu_synchronize_all_post_reset in qemu_system_reset
  (writeback after system reset)

These writeback points + the existing one of VCPU exec after
cpu_synchronize_state map on three levels of writeback:

- KVM_PUT_ASYNC_STATE (during runtime, other VCPUs continue to run)
- KVM_PUT_RESET_STATE (on synchronous system reset, all VCPUs stopped)
- KVM_PUT_FULL_STATE  (on init or vmload, all VCPUs stopped as well)

This level is passed to the arch-specific VCPU state writing function
that will decide which concrete substates need to be written. That way,
no writer of load, save or reset functions that interact with in-kernel
KVM states will ever have to worry about synchronization again. That
also means that a lot of reasons for races, segfaults and deadlocks are
eliminated.

cpu_synchronize_state remains untouched, just as Anthony suggested. We
continue to need it before reading or writing of VCPU states that are
also tracked by in-kernel KVM subsystems.

Consequently, this patch removes many cpu_synchronize_state calls that
are now redundant, just like remaining explicit register syncs. It does
not touch qemu-kvm's special hooks for mpstate, vcpu_events, or tsc
loading. They will be cleaned up by individual patches.

Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
---
 exec.c                |   17 -----------------
 hw/apic.c             |    3 ---
 hw/pc.c               |    1 -
 hw/ppc_newworld.c     |    3 ---
 hw/ppc_oldworld.c     |    3 ---
 hw/s390-virtio.c      |    1 -
 kvm-all.c             |   19 +++++++++++++------
 kvm.h                 |   22 +++++++++++++++++++++-
 qemu-kvm-ia64.c       |    2 +-
 qemu-kvm-x86.c        |    3 +--
 qemu-kvm.c            |   16 +++++++++++++---
 qemu-kvm.h            |    2 +-
 savevm.c              |    4 ++++
 sysemu.h              |    4 ++++
 target-i386/kvm.c     |    2 +-
 target-i386/machine.c |   10 ----------
 target-ia64/machine.c |    2 --
 target-ppc/kvm.c      |    2 +-
 target-ppc/machine.c  |    4 ----
 target-s390x/kvm.c    |    3 +--
 vl.c                  |   29 +++++++++++++++++++++++++++++
 21 files changed, 90 insertions(+), 62 deletions(-)

diff --git a/exec.c b/exec.c
index ade09cb..7b35e0f 100644
--- a/exec.c
+++ b/exec.c
@@ -529,21 +529,6 @@ void cpu_exec_init_all(unsigned long tb_size)
 
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 
-static void cpu_common_pre_save(void *opaque)
-{
-    CPUState *env = opaque;
-
-    cpu_synchronize_state(env);
-}
-
-static int cpu_common_pre_load(void *opaque)
-{
-    CPUState *env = opaque;
-
-    cpu_synchronize_state(env);
-    return 0;
-}
-
 static int cpu_common_post_load(void *opaque, int version_id)
 {
     CPUState *env = opaque;
@@ -561,8 +546,6 @@ static const VMStateDescription vmstate_cpu_common = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
-    .pre_save = cpu_common_pre_save,
-    .pre_load = cpu_common_pre_load,
     .post_load = cpu_common_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_UINT32(halted, CPUState),
diff --git a/hw/apic.c b/hw/apic.c
index ae805dc..3e03e10 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -488,7 +488,6 @@ void apic_init_reset(CPUState *env)
     if (!s)
         return;
 
-    cpu_synchronize_state(env);
     s->tpr = 0;
     s->spurious_vec = 0xff;
     s->log_dest = 0;
@@ -1070,8 +1069,6 @@ static void apic_reset(void *opaque)
     APICState *s = opaque;
     int bsp;
 
-    cpu_synchronize_state(s->cpu_env);
-
     bsp = cpu_is_bsp(s->cpu_env);
     s->apicbase = 0xfee00000 |
         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
diff --git a/hw/pc.c b/hw/pc.c
index af6ea8b..6c15a9f 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -744,7 +744,6 @@ CPUState *pc_new_cpu(const char *cpu_model)
         fprintf(stderr, "Unable to find x86 CPU definition\n");
         exit(1);
     }
-    env->kvm_vcpu_dirty = 1;
     if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
         env->cpuid_apic_id = env->cpu_index;
         /* APIC reset callback resets cpu */
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index a4c714a..9e288bd 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -139,9 +139,6 @@ static void ppc_core99_init (ram_addr_t ram_size,
         envs[i] = env;
     }
 
-    /* Make sure all register sets take effect */
-    cpu_synchronize_state(env);
-
     /* allocate RAM */
     ram_offset = qemu_ram_alloc(ram_size);
     cpu_register_physical_memory(0, ram_size, ram_offset);
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 7ccc6a1..1aa05ed 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -164,9 +164,6 @@ static void ppc_heathrow_init (ram_addr_t ram_size,
         envs[i] = env;
     }
 
-    /* Make sure all register sets take effect */
-    cpu_synchronize_state(env);
-
     /* allocate RAM */
     if (ram_size > (2047 << 20)) {
         fprintf(stderr,
diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 3582728..ad3386f 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -185,7 +185,6 @@ static void s390_init(ram_addr_t ram_size,
             exit(1);
         }
 
-        cpu_synchronize_state(env);
         env->psw.addr = KERN_IMAGE_START;
         env->psw.mask = 0x0000000180000000ULL;
     }
diff --git a/kvm-all.c b/kvm-all.c
index f3cfa2c..8fc89c1 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -152,10 +152,6 @@ static void kvm_reset_vcpu(void *opaque)
     CPUState *env = opaque;
 
     kvm_arch_reset_vcpu(env);
-    if (kvm_arch_put_registers(env)) {
-        fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
-        abort();
-    }
 }
 #endif
 
@@ -206,7 +202,6 @@ int kvm_init_vcpu(CPUState *env)
     if (ret == 0) {
         qemu_register_reset(kvm_reset_vcpu, env);
         kvm_arch_reset_vcpu(env);
-        ret = kvm_arch_put_registers(env);
     }
 err:
     return ret;
@@ -579,6 +574,18 @@ void kvm_cpu_synchronize_state(CPUState *env)
     }
 }
 
+void kvm_cpu_synchronize_post_reset(CPUState *env)
+{
+    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
+    env->kvm_vcpu_dirty = 0;
+}
+
+void kvm_cpu_synchronize_post_init(CPUState *env)
+{
+    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
+    env->kvm_vcpu_dirty = 0;
+}
+
 int kvm_cpu_exec(CPUState *env)
 {
     struct kvm_run *run = env->kvm_run;
@@ -594,7 +601,7 @@ int kvm_cpu_exec(CPUState *env)
         }
 
         if (env->kvm_vcpu_dirty) {
-            kvm_arch_put_registers(env);
+            kvm_arch_put_registers(env, KVM_PUT_ASYNC_STATE);
             env->kvm_vcpu_dirty = 0;
         }
 
diff --git a/kvm.h b/kvm.h
index 740fd1a..ee8b3f6 100644
--- a/kvm.h
+++ b/kvm.h
@@ -92,7 +92,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
 
 int kvm_arch_get_registers(CPUState *env);
 
-int kvm_arch_put_registers(CPUState *env);
+#define KVM_PUT_ASYNC_STATE     1
+#define KVM_PUT_RESET_STATE     2
+#define KVM_PUT_FULL_STATE      3
+
+int kvm_arch_put_registers(CPUState *env, int level);
 
 int kvm_arch_init(KVMState *s, int smp_cpus);
 
@@ -136,6 +140,8 @@ int kvm_check_extension(KVMState *s, unsigned int extension);
 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
                                       int reg);
 void kvm_cpu_synchronize_state(CPUState *env);
+void kvm_cpu_synchronize_post_reset(CPUState *env);
+void kvm_cpu_synchronize_post_init(CPUState *env);
 
 /* generic hooks - to be moved/refactored once there are more users */
 
@@ -146,4 +152,18 @@ static inline void cpu_synchronize_state(CPUState *env)
     }
 }
 
+static inline void cpu_synchronize_post_reset(CPUState *env)
+{
+    if (kvm_enabled()) {
+        kvm_cpu_synchronize_post_reset(env);
+    }
+}
+
+static inline void cpu_synchronize_post_init(CPUState *env)
+{
+    if (kvm_enabled()) {
+        kvm_cpu_synchronize_post_init(env);
+    }
+}
+
 #endif
diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
index a11fde8..fc8110e 100644
--- a/qemu-kvm-ia64.c
+++ b/qemu-kvm-ia64.c
@@ -16,7 +16,7 @@ int kvm_arch_qemu_create_context(void)
     return 0;
 }
 
-void kvm_arch_load_regs(CPUState *env)
+void kvm_arch_load_regs(CPUState *env, int level)
 {
 }
 
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 834e9c1..63cd095 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -803,7 +803,7 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
 	| (rhs->avl * DESC_AVL_MASK);
 }
 
-void kvm_arch_load_regs(CPUState *env)
+void kvm_arch_load_regs(CPUState *env, int level)
 {
     struct kvm_regs regs;
     struct kvm_fpu fpu;
@@ -1365,7 +1365,6 @@ void kvm_arch_push_nmi(void *opaque)
 void kvm_arch_cpu_reset(CPUState *env)
 {
     kvm_arch_reset_vcpu(env);
-    kvm_arch_load_regs(env);
     kvm_put_vcpu_events(env);
     if (!cpu_is_bsp(env)) {
 	if (kvm_irqchip_in_kernel()) {
diff --git a/qemu-kvm.c b/qemu-kvm.c
index a220353..53030f1 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -862,7 +862,7 @@ int pre_kvm_run(kvm_context_t kvm, CPUState *env)
     kvm_arch_pre_run(env, env->kvm_run);
 
     if (env->kvm_vcpu_dirty) {
-        kvm_arch_load_regs(env);
+        kvm_arch_load_regs(env, KVM_PUT_ASYNC_STATE);
         env->kvm_vcpu_dirty = 0;
     }
 
@@ -1535,6 +1535,18 @@ void kvm_cpu_synchronize_state(CPUState *env)
     }
 }
 
+void kvm_cpu_synchronize_post_reset(CPUState *env)
+{
+    kvm_arch_load_regs(env, KVM_PUT_RESET_STATE);
+    env->kvm_vcpu_dirty = 0;
+}
+
+void kvm_cpu_synchronize_post_init(CPUState *env)
+{
+    kvm_arch_load_regs(env, KVM_PUT_FULL_STATE);
+    env->kvm_vcpu_dirty = 0;
+}
+
 static void inject_interrupt(void *data)
 {
     cpu_interrupt(current_env, (long) data);
@@ -1880,8 +1892,6 @@ static void *ap_main_loop(void *_env)
 
     kvm_arch_init_vcpu(env);
 
-    kvm_arch_load_regs(env);
-
     /* signal VCPU creation */
     current_env->created = 1;
     pthread_cond_signal(&qemu_vcpu_cond);
diff --git a/qemu-kvm.h b/qemu-kvm.h
index e8c3a58..6d785a0 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -908,7 +908,7 @@ int kvm_qemu_destroy_memory_alias(uint64_t phys_start);
 int kvm_arch_qemu_create_context(void);
 
 void kvm_arch_save_regs(CPUState *env);
-void kvm_arch_load_regs(CPUState *env);
+void kvm_arch_load_regs(CPUState *env, int level);
 void kvm_arch_load_mpstate(CPUState *env);
 void kvm_arch_save_mpstate(CPUState *env);
 int kvm_arch_has_work(CPUState *env);
diff --git a/savevm.c b/savevm.c
index 2fd3de6..f1e675c 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1368,6 +1368,8 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f)
 {
     SaveStateEntry *se;
 
+    cpu_synchronize_all_states();
+
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if (se->save_live_state == NULL)
             continue;
@@ -1568,6 +1570,8 @@ int qemu_loadvm_state(QEMUFile *f)
         }
     }
 
+    cpu_synchronize_all_post_init();
+
     ret = 0;
 
 out:
diff --git a/sysemu.h b/sysemu.h
index ff97786..f92b94a 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -59,6 +59,10 @@ int load_vmstate(Monitor *mon, const char *name);
 void do_delvm(Monitor *mon, const QDict *qdict);
 void do_info_snapshots(Monitor *mon);
 
+void cpu_synchronize_all_states(void);
+void cpu_synchronize_all_post_reset(void);
+void cpu_synchronize_all_post_init(void);
+
 void qemu_announce_self(void);
 
 void main_loop_wait(int timeout);
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index e1ae15d..4a0c8bb 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -857,7 +857,7 @@ int kvm_get_vcpu_events(CPUState *env)
 }
 
 #ifdef KVM_UPSTREAM
-int kvm_arch_put_registers(CPUState *env)
+int kvm_arch_put_registers(CPUState *env, int level)
 {
     int ret;
 
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 0b8a33a..bebde82 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -322,7 +322,6 @@ static void cpu_pre_save(void *opaque)
     CPUState *env = opaque;
     int i;
 
-    cpu_synchronize_state(env);
     if (kvm_enabled()) {
         kvm_save_mpstate(env);
         kvm_get_vcpu_events(env);
@@ -342,14 +341,6 @@ static void cpu_pre_save(void *opaque)
 #endif
 }
 
-static int cpu_pre_load(void *opaque)
-{
-    CPUState *env = opaque;
-
-    cpu_synchronize_state(env);
-    return 0;
-}
-
 static int cpu_post_load(void *opaque, int version_id)
 {
     CPUState *env = opaque;
@@ -389,7 +380,6 @@ static const VMStateDescription vmstate_cpu = {
     .minimum_version_id = 3,
     .minimum_version_id_old = 3,
     .pre_save = cpu_pre_save,
-    .pre_load = cpu_pre_load,
     .post_load = cpu_post_load,
     .fields      = (VMStateField []) {
         VMSTATE_UINTTL_ARRAY(regs, CPUState, CPU_NB_REGS),
diff --git a/target-ia64/machine.c b/target-ia64/machine.c
index 7d29575..fdbeeef 100644
--- a/target-ia64/machine.c
+++ b/target-ia64/machine.c
@@ -9,7 +9,6 @@ void cpu_save(QEMUFile *f, void *opaque)
     CPUState *env = opaque;
 
     if (kvm_enabled()) {
-        kvm_arch_save_regs(env);
         kvm_arch_save_mpstate(env);
     }
 }
@@ -19,7 +18,6 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     CPUState *env = opaque;
 
     if (kvm_enabled()) {
-        kvm_arch_load_regs(env);
         kvm_arch_load_mpstate(env);
     }
     return 0;
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 0424a78..debe441 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -57,7 +57,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
 {
 }
 
-int kvm_arch_put_registers(CPUState *env)
+int kvm_arch_put_registers(CPUState *env, int level)
 {
     struct kvm_regs regs;
     int ret;
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index ead38e1..81d3f72 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -8,8 +8,6 @@ void cpu_save(QEMUFile *f, void *opaque)
     CPUState *env = (CPUState *)opaque;
     unsigned int i, j;
 
-    cpu_synchronize_state(env);
-
     for (i = 0; i < 32; i++)
         qemu_put_betls(f, &env->gpr[i]);
 #if !defined(TARGET_PPC64)
@@ -97,8 +95,6 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     CPUState *env = (CPUState *)opaque;
     unsigned int i, j;
 
-    cpu_synchronize_state(env);
-
     for (i = 0; i < 32; i++)
         qemu_get_betls(f, &env->gpr[i]);
 #if !defined(TARGET_PPC64)
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 0992563..59b8a17 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -91,7 +91,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
     /* FIXME: add code to reset vcpu. */
 }
 
-int kvm_arch_put_registers(CPUState *env)
+int kvm_arch_put_registers(CPUState *env, int level)
 {
     struct kvm_regs regs;
     int ret;
@@ -296,7 +296,6 @@ static int handle_hypercall(CPUState *env, struct kvm_run *run)
 
     cpu_synchronize_state(env);
     r = s390_virtio_hypercall(env);
-    kvm_arch_put_registers(env);
 
     return r;
 }
diff --git a/vl.c b/vl.c
index 25af448..74f2a6f 100644
--- a/vl.c
+++ b/vl.c
@@ -3047,6 +3047,33 @@ static void nographic_update(void *opaque)
     qemu_mod_timer(nographic_timer, interval + qemu_get_clock(rt_clock));
 }
 
+void cpu_synchronize_all_states(void)
+{
+    CPUState *cpu;
+
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
+        cpu_synchronize_state(cpu);
+    }
+}
+
+void cpu_synchronize_all_post_reset(void)
+{
+    CPUState *cpu;
+
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
+        cpu_synchronize_post_reset(cpu);
+    }
+}
+
+void cpu_synchronize_all_post_init(void)
+{
+    CPUState *cpu;
+
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
+        cpu_synchronize_post_init(cpu);
+    }
+}
+
 struct vm_change_state_entry {
     VMChangeStateHandler *cb;
     void *opaque;
@@ -3195,6 +3222,7 @@ void qemu_system_reset(void)
     QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) {
         re->func(re->opaque);
     }
+    cpu_synchronize_all_post_reset();
 }
 
 void qemu_system_reset_request(void)
@@ -5956,6 +5984,7 @@ int main(int argc, char **argv, char **envp)
     machine->init(ram_size, boot_devices,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 
+    cpu_synchronize_all_post_init();
 
 #ifndef _WIN32
     /* must be after terminal init, SDL library changes signal handlers */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux