Glauber Costa wrote: > When we create a new vcpu, we need to make sure that > all of the state it is going to use (apic state, for example) > already exists. We can do it nicely by making sure kvm_init_vcpu > is executed after everything else in cpu creation. > > After that, the first call to KVM_SET_LAPIC ioctl will not find an > existant vcpu. So we introduce a function that tell us that the vcpu > is already initialized, and is it safe to call the ioctl. > > We force the execution of the KVM_SET_LAPIC from within the new > vcpu thread, that will replace this first initialization call. > > Signed-off-by: Glauber Costa <glommer@xxxxxxxxxx> > --- > hw/apic.c | 21 +++++++++++---------- > hw/pc.c | 1 + > qemu-kvm.c | 10 ++++++++++ > qemu-kvm.h | 4 ++++ > target-i386/helper.c | 2 -- > 5 files changed, 26 insertions(+), 12 deletions(-) > > diff --git a/hw/apic.c b/hw/apic.c > index 466fb7e..b7cd18e 100644 > --- a/hw/apic.c > +++ b/hw/apic.c > @@ -891,6 +891,15 @@ static void kvm_kernel_lapic_load_from_user(APICState *s) > > #endif > > +void qemu_kvm_load_lapic(CPUState *env) > +{ > +#ifdef KVM_CAP_IRQCHIP > + if (kvm_enabled() && kvm_vcpu_inited(env) && qemu_kvm_irqchip_in_kernel()) { > + kvm_kernel_lapic_load_from_user(env->apic_state); > + } > +#endif > +} > + > static void apic_save(QEMUFile *f, void *opaque) > { > APICState *s = opaque; > @@ -965,11 +974,7 @@ static int apic_load(QEMUFile *f, void *opaque, int version_id) > if (version_id >= 2) > qemu_get_timer(f, s->timer); > > -#ifdef KVM_CAP_IRQCHIP > - if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { > - kvm_kernel_lapic_load_from_user(s); > - } > -#endif > + qemu_kvm_load_lapic(s->cpu_env); > > return 0; > } > @@ -991,11 +996,7 @@ static void apic_reset(void *opaque) > */ > s->lvt[APIC_LVT_LINT0] = 0x700; > } > -#ifdef KVM_CAP_IRQCHIP > - if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { > - kvm_kernel_lapic_load_from_user(s); > - } > -#endif > + qemu_kvm_load_lapic(s->cpu_env); > } > > static CPUReadMemoryFunc *apic_mem_read[3] = { > diff --git a/hw/pc.c b/hw/pc.c > index 34a4d25..1675510 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -854,6 +854,7 @@ CPUState *pc_new_cpu(int cpu, const char *cpu_model, int pci_enabled) > if (pci_enabled) { > apic_init(env); > } > + qemu_init_vcpu(env); > return env; Yeah, it always looks funny when you patch mis-formatted code... > } > > diff --git a/qemu-kvm.c b/qemu-kvm.c > index 8c0d463..8fd80c1 100644 > --- a/qemu-kvm.c > +++ b/qemu-kvm.c > @@ -435,6 +435,9 @@ static void *ap_main_loop(void *_env) > kvm_create_vcpu(kvm_context, env->cpu_index); > kvm_qemu_init_env(env); > > + /* APIC state creation takes place before we get here. So despite the fact that > + * apic_reset() (called by apic_init) will also load the apic state, we have to redo it here > + */ > #ifdef USE_KVM_DEVICE_ASSIGNMENT > /* do ioperm for io ports of assigned devices */ > LIST_FOREACH(data, &ioperm_head, entries) > @@ -446,6 +449,8 @@ static void *ap_main_loop(void *_env) > current_env->kvm_cpu_state.created = 1; > pthread_cond_signal(&qemu_vcpu_cond); > > + qemu_kvm_load_lapic(env); > + This feels strange after a first glance, I need to look closer... Ah wait, found one reason for this feeling: APIC is x86 stuff, but you are patching generic code. > /* and wait for machine initialization */ > while (!qemu_system_ready) > qemu_cond_wait(&qemu_system_cond); > @@ -463,6 +468,11 @@ void kvm_init_vcpu(CPUState *env) > qemu_cond_wait(&qemu_vcpu_cond); > } > > +int kvm_vcpu_inited(CPUState *env) > +{ > + return env->kvm_cpu_state.created; > +} > + > int kvm_init_ap(void) > { > #ifdef TARGET_I386 > diff --git a/qemu-kvm.h b/qemu-kvm.h > index c0549df..6fa9d5a 100644 > --- a/qemu-kvm.h > +++ b/qemu-kvm.h > @@ -16,6 +16,7 @@ int kvm_main_loop(void); > int kvm_qemu_init(void); > int kvm_qemu_create_context(void); > int kvm_init_ap(void); > +int kvm_vcpu_inited(CPUState *env); > void kvm_qemu_destroy(void); > void kvm_load_registers(CPUState *env); > void kvm_save_registers(CPUState *env); > @@ -31,6 +32,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap); > int kvm_qemu_init_env(CPUState *env); > int kvm_qemu_check_extension(int ext); > void kvm_apic_init(CPUState *env); > +/* called from vcpu initialization */ > +void qemu_kvm_load_lapic(CPUState *env); > + > int kvm_set_irq(int irq, int level, int *status); > > int kvm_physical_memory_set_dirty_tracking(int enable); > diff --git a/target-i386/helper.c b/target-i386/helper.c > index 719e31e..511b48c 100644 > --- a/target-i386/helper.c > +++ b/target-i386/helper.c > @@ -1696,7 +1696,5 @@ CPUX86State *cpu_x86_init(const char *cpu_model) > kqemu_init(env); > #endif > > - qemu_init_vcpu(env); > - > return env; > } The reordering of qemu_init_vcpu could also simplify reset management (I have a patch pending that adds a kvm hook to apic reset for solving it within the existing scheme). But I would suggest to get an ack from upstream first, or better even merge this pattern there and then adjust qemu-kvm. The other way around is calling for troubles if qemu sticks with a different approach. Jan -- Siemens AG, Corporate Technology, CT SE 2 Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html