On 2013-09-26 17:04, Paolo Bonzini wrote: > Il 16/09/2013 10:11, Arthur Chunqi Li ha scritto: >> This patch contains the following two changes: >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> with some reasons not emulated by L1, preemption timer value should >> be save in such exits. >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> to nVMX. >> >> With this patch, nested VMX preemption timer features are fully >> supported. >> >> Signed-off-by: Arthur Chunqi Li <yzt356@xxxxxxxxx> >> --- >> ChangeLog to v4: >> Format changes and remove a flag in nested_vmx. >> arch/x86/include/uapi/asm/msr-index.h | 1 + >> arch/x86/kvm/vmx.c | 44 +++++++++++++++++++++++++++++++-- >> 2 files changed, 43 insertions(+), 2 deletions(-) > > Hi all, > > the test fails for me if the preemption timer value is set to a value > that is above ~2000 (which means ~65000 TSC cycles on this machine). > The preemption timer seems to count faster than what is expected, for > example only up to 4 million cycles if you set it to one million. > So, I am leaving the patch out of kvm/queue for now, until I can > test it on more processors. So this behaviour is a regression of this patch (and your own version as well)? > > I tried redoing the patch with a completely different logic, > and I get exactly the same behavior and the same wrong values for the > TSC. My patch (below is the diff from Arthur's) is a bit simpler; like > Arthur I run L2 with the save-timer control set, but I do not need any > adjustment upon entry to L2 because I just use the value left by the > processor in the VMCS. Also, in your patch L1 is also running with > the preemption timer, if I understand the code correctly. > > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -6730,27 +6731,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *in > *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > } > > -static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) > -{ > - u64 delta_tsc_l1; > - u32 preempt_val_l1, preempt_val_l2, preempt_scale; > - > - if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & > - PIN_BASED_VMX_PREEMPTION_TIMER)) > - return; > - preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & > - MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; > - preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > - delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) > - - vcpu->arch.last_guest_tsc; > - preempt_val_l1 = delta_tsc_l1 >> preempt_scale; > - if (preempt_val_l2 <= preempt_val_l1) > - preempt_val_l2 = 0; > - else > - preempt_val_l2 -= preempt_val_l1; > - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); > -} > - > /* > * The guest has exited. See if we can fix it or if we need userspace > * assistance. > @@ -7161,8 +7141,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > atomic_switch_perf_msrs(vmx); > debugctlmsr = get_debugctlmsr(); > > - if (is_guest_mode(vcpu) && !(vmx->nested.nested_run_pending)) > - nested_adjust_preemption_timer(vcpu); > vmx->__launched = vmx->loaded_vmcs->launched; > asm( > /* Store host registers */ > @@ -7294,6 +7272,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > | (1 << VCPU_EXREG_CR3)); > vcpu->arch.regs_dirty = 0; > > + if (is_guest_mode(vcpu)) { > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && > + (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { > + vmcs12->vmx_preemption_timer_value = > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); Is the preemption timer guaranteed to continue to tick while in L0? I think this is what you are relying on here, right? Jan > + } > + } > + > vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); > > vmx->loaded_vmcs->launched = 1; > @@ -7633,7 +7633,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > vmcs_write64(VMCS_LINK_POINTER, -1ull); > > vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, > - (vmcs_config.pin_based_exec_ctrl | > + (vmx_pin_based_exec_ctrl(vmx) | > vmcs12->pin_based_vm_exec_control)); > > if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > @@ -8159,13 +8146,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > > - if ((vmcs12->pin_based_vm_exec_control & > - PIN_BASED_VMX_PREEMPTION_TIMER) && > - (vmcs12->vm_exit_controls & > - VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) > - vmcs12->vmx_preemption_timer_value = > - vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > - > /* > * In some cases (usually, nested EPT), L2 is allowed to change its > * own CR3 without exiting. If it has changed it, we must keep it. > @@ -8396,6 +8375,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) > > load_vmcs12_host_state(vcpu, vmcs12); > > + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); > + > /* Update TSC_OFFSET if TSC was changed while L2 ran */ > vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); > > > Also note that I used vmx_pin_based_exec_ctrl instead of > vmcs_config.pin_based_exec_ctrl. Can anyone spot anything wrong > in this code? > > Paolo >
Attachment:
signature.asc
Description: OpenPGP digital signature