Nadav Amit <namit@xxxxxxxxxxxxxxxxx> writes: > x86 architecture defines differences between the reset and INIT sequences. > INIT does not initialize the FPU (including MMX, XMM, YMM, etc.), TSC, PMU, > MSRs (in general), MTRRs machine-check, APIC ID, APIC arbitration ID and BSP. > > EFER is supposed NOT to be reset according to the SDM, but leaving the LMA and > LME untouched causes failed VM-entry. Therefore we reset EFER (although it is > unclear whether the rest of EFER bits should be reset). Thanks! This was actually in my todo list. #INIT and #RESET are actually separate pins on the processor. So, shouldn't we differentiate between the two too by having (*vcpu_init) and (*vcpu_reset) separate ? Bandan > References (from Intel SDM): > > "If the MP protocol has completed and a BSP is chosen, subsequent INITs (either > to a specific processor or system wide) do not cause the MP protocol to be > repeated." [8.4.2: MP Initialization Protocol Requirements and Restrictions] > > [Table 9-1. IA-32 Processor States Following Power-up, Reset, or INIT] > > "If the processor is reset by asserting the INIT# pin, the x87 FPU state is not > changed." [9.2: X87 FPU INITIALIZATION] > > "The state of the local APIC following an INIT reset is the same as it is after > a power-up or hardware reset, except that the APIC ID and arbitration ID > registers are not affected." [10.4.7.3: Local APIC State After an INIT Reset > (“Wait-for-SIPI” State)] > > Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 6 +++--- > arch/x86/kvm/lapic.c | 11 ++++++----- > arch/x86/kvm/lapic.h | 2 +- > arch/x86/kvm/svm.c | 2 +- > arch/x86/kvm/vmx.c | 30 +++++++++++++++++------------- > arch/x86/kvm/x86.c | 17 ++++++++++------- > 6 files changed, 38 insertions(+), 30 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index bf5a160..59f4374 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -701,7 +701,7 @@ struct kvm_x86_ops { > /* Create, but do not attach this VCPU */ > struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); > void (*vcpu_free)(struct kvm_vcpu *vcpu); > - void (*vcpu_reset)(struct kvm_vcpu *vcpu); > + void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); > > void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); > void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); > @@ -989,7 +989,7 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); > > void kvm_inject_nmi(struct kvm_vcpu *vcpu); > > -int fx_init(struct kvm_vcpu *vcpu); > +int fx_init(struct kvm_vcpu *vcpu, bool init_event); > > void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, > const u8 *new, int bytes); > @@ -1134,7 +1134,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); > int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); > int kvm_cpu_get_interrupt(struct kvm_vcpu *v); > -void kvm_vcpu_reset(struct kvm_vcpu *vcpu); > +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); > void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); > void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, > unsigned long address); > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index bd4e34d..17da6fc 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1534,7 +1534,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > > } > > -void kvm_lapic_reset(struct kvm_vcpu *vcpu) > +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct kvm_lapic *apic; > int i; > @@ -1548,7 +1548,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) > /* Stop the timer in case it's a reset to an active apic */ > hrtimer_cancel(&apic->lapic_timer.timer); > > - kvm_apic_set_id(apic, vcpu->vcpu_id); > + if (!init_event) > + kvm_apic_set_id(apic, vcpu->vcpu_id); > kvm_apic_set_version(apic->vcpu); > > for (i = 0; i < APIC_LVT_NUM; i++) > @@ -1689,7 +1690,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) > APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); > > static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ > - kvm_lapic_reset(vcpu); > + kvm_lapic_reset(vcpu, false); > kvm_iodevice_init(&apic->dev, &apic_mmio_ops); > > return 0; > @@ -2023,8 +2024,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) > pe = xchg(&apic->pending_events, 0); > > if (test_bit(KVM_APIC_INIT, &pe)) { > - kvm_lapic_reset(vcpu); > - kvm_vcpu_reset(vcpu); > + kvm_lapic_reset(vcpu, true); > + kvm_vcpu_reset(vcpu, true); > if (kvm_vcpu_is_bsp(apic->vcpu)) > vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > else > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 0bc6c65..e4c82dc 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); > int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); > void kvm_apic_accept_events(struct kvm_vcpu *vcpu); > -void kvm_lapic_reset(struct kvm_vcpu *vcpu); > +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); > u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); > void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); > void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 155534c..1ef4c0d 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1195,7 +1195,7 @@ static void init_vmcb(struct vcpu_svm *svm) > enable_gif(svm); > } > > -static void svm_vcpu_reset(struct kvm_vcpu *vcpu) > +static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct vcpu_svm *svm = to_svm(vcpu); > u32 dummy; > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index fdd9f8b..e9c94c6 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4694,7 +4694,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > return 0; > } > > -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) > +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > struct msr_data apic_base_msr; > @@ -4705,11 +4705,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) > > vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); > kvm_set_cr8(&vmx->vcpu, 0); > - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; > - if (kvm_vcpu_is_bsp(&vmx->vcpu)) > - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; > - apic_base_msr.host_initiated = true; > - kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); > + > + if (!init_event) { > + apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | > + MSR_IA32_APICBASE_ENABLE; > + if (kvm_vcpu_is_bsp(&vmx->vcpu)) > + apic_base_msr.data |= MSR_IA32_APICBASE_BSP; > + apic_base_msr.host_initiated = true; > + kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); > + } > > vmx_segment_cache_clear(vmx); > > @@ -4733,9 +4737,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) > vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); > vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); > > - vmcs_write32(GUEST_SYSENTER_CS, 0); > - vmcs_writel(GUEST_SYSENTER_ESP, 0); > - vmcs_writel(GUEST_SYSENTER_EIP, 0); > + if (!init_event) { > + vmcs_write32(GUEST_SYSENTER_CS, 0); > + vmcs_writel(GUEST_SYSENTER_ESP, 0); > + vmcs_writel(GUEST_SYSENTER_EIP, 0); > + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > + } > > vmcs_writel(GUEST_RFLAGS, 0x02); > kvm_rip_write(vcpu, 0xfff0); > @@ -4750,14 +4757,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) > vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); > vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); > > - /* Special registers */ > - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > - > setup_msrs(vmx); > > vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ > > - if (cpu_has_vmx_tpr_shadow()) { > + if (cpu_has_vmx_tpr_shadow() && !init_event) { > vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); > if (vm_need_tpr_shadow(vmx->vcpu.kvm)) > vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index cc2c759..324e639 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6961,7 +6961,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) > return 0; > } > > -int fx_init(struct kvm_vcpu *vcpu) > +int fx_init(struct kvm_vcpu *vcpu, bool init_event) > { > int err; > > @@ -6969,7 +6969,9 @@ int fx_init(struct kvm_vcpu *vcpu) > if (err) > return err; > > - fpu_finit(&vcpu->arch.guest_fpu); > + if (!init_event) > + fpu_finit(&vcpu->arch.guest_fpu); > + > if (cpu_has_xsaves) > vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = > host_xcr0 | XSTATE_COMPACTION_ENABLED; > @@ -7049,7 +7051,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) > r = vcpu_load(vcpu); > if (r) > return r; > - kvm_vcpu_reset(vcpu); > + kvm_vcpu_reset(vcpu, false); > kvm_mmu_setup(vcpu); > vcpu_put(vcpu); > > @@ -7087,7 +7089,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > kvm_x86_ops->vcpu_free(vcpu); > } > > -void kvm_vcpu_reset(struct kvm_vcpu *vcpu) > +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > atomic_set(&vcpu->arch.nmi_queued, 0); > vcpu->arch.nmi_pending = 0; > @@ -7111,13 +7113,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) > kvm_async_pf_hash_reset(vcpu); > vcpu->arch.apf.halted = false; > > - kvm_pmu_reset(vcpu); > + if (!init_event) > + kvm_pmu_reset(vcpu); > > memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); > vcpu->arch.regs_avail = ~0; > vcpu->arch.regs_dirty = ~0; > > - kvm_x86_ops->vcpu_reset(vcpu); > + kvm_x86_ops->vcpu_reset(vcpu, init_event); > } > > void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) > @@ -7299,7 +7302,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > goto fail_free_mce_banks; > } > > - r = fx_init(vcpu); > + r = fx_init(vcpu, false); > if (r) > goto fail_free_wbinvd_dirty_mask; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html