On Wed, May 20, 2020 at 04:22:27PM -0700, Makarand Sonare wrote: > From: Peter Shier <pshier@xxxxxxxxxx> > > Add new field to hold preemption timer expiration deadline > appended to struct kvm_vmx_nested_state_hdr. This is to prevent > the first VM-Enter after migration from incorrectly restarting the timer > with the full timer value instead of partially decayed timer value. > KVM_SET_NESTED_STATE restarts timer using migrated state regardless > of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER. > > Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state") > > Signed-off-by: Peter Shier <pshier@xxxxxxxxxx> > Signed-off-by: Makarand Sonare <makarandsonare@xxxxxxxxxx> > Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753 > --- ... > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 51ebb60e1533a..46dc2ef731b37 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -2092,9 +2092,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) > return HRTIMER_NORESTART; > } > > -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) > +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, > + u64 preemption_timeout) > { > - u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; > struct vcpu_vmx *vmx = to_vmx(vcpu); > > /* > @@ -3353,8 +3353,21 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, > * the timer. > */ > vmx->nested.preemption_timer_expired = false; > - if (nested_cpu_has_preemption_timer(vmcs12)) > - vmx_start_preemption_timer(vcpu); > + if (nested_cpu_has_preemption_timer(vmcs12)) { > + u64 timer_value = 0; Personal preference would be to zero this in an else case. More to make it obvious there isn't an uninitialized access than to shave cycles. Or get rid of timer_value altogether (see below). > + u64 l1_scaled_tsc_value = (kvm_read_l1_tsc(vcpu, rdtsc()) Dropping the "_value" can help avoid some wraps, i.e. use l1_scaled_tsc. > + >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE); Parantheses around the entire expression are unnecessary. And I strongly prefer the operator on the previous line, not sure how others feel. > + > + if (!vmx->nested.has_preemption_timer_deadline) { > + timer_value = vmcs12->vmx_preemption_timer_value; > + vmx->nested.preemption_timer_deadline = timer_value + > + l1_scaled_tsc_value; > + vmx->nested.has_preemption_timer_deadline = true; > + } else if (l1_scaled_tsc_value <= vmx->nested.preemption_timer_deadline) Not that it matters, but this can be '<'. > + timer_value = vmx->nested.preemption_timer_deadline - > + l1_scaled_tsc_value; > + vmx_start_preemption_timer(vcpu, timer_value); Any objection to moving this to a separate helper? It'd reduce the indentation enough that, if combined with shorter names, would eliminate the line wraps and yield a nice diff to boot. E.g. something like: diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6a4f06c2e741..d204aa0910c2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2093,9 +2093,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) +static void __vmx_start_preemption_timer(struct kvm_vcpu *vcpu, + u64 preemption_timeout) { - u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; struct vcpu_vmx *vmx = to_vmx(vcpu); /* @@ -2117,6 +2117,27 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); } +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >> + VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; + u64 val; + + if (!vmx->nested.has_preemption_timer_deadline) { + val = vmcs12->vmx_preemption_timer_value; + vmx->nested.has_preemption_timer_deadline = true; + vmx->nested.preemption_timer_deadline = val + l1_scaled_tsc; + } else if (vmx->nested.preemption_timer_deadline > l1_scaled_tsc) { + val = vmx->nested.preemption_timer_deadline - l1_scaled_tsc; + } else { + val = 0; + } + __vmx_start_preemption_timer(vcpu, val); +} + + static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { if (vmx->nested.nested_run_pending && @@ -3358,7 +3379,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, */ vmx->nested.preemption_timer_expired = false; if (nested_cpu_has_preemption_timer(vmcs12)) - vmx_start_preemption_timer(vcpu); + vmx_start_preemption_timer(vcpu, vmcs12); /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point > + } > > /* > * Note no nested_vmx_succeed or nested_vmx_fail here. At this point > @@ -3462,6 +3475,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > * the nested entry. > */ > vmx->nested.nested_run_pending = 1; > + vmx->nested.has_preemption_timer_deadline = false; > status = nested_vmx_enter_non_root_mode(vcpu, true); > if (unlikely(status != NVMX_VMENTRY_SUCCESS)) > goto vmentry_failed; > @@ -3962,9 +3976,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; > > if (nested_cpu_has_preemption_timer(vmcs12) && > - vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > + !vmx->nested.nested_run_pending) { > + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) The extra if statement is unnecessary, which in turn makes the curly braces unnecessary. > vmcs12->vmx_preemption_timer_value = > vmx_get_preemption_timer_value(vcpu); > + } > > /* > * In some cases (usually, nested EPT), L2 is allowed to change its > @@ -5898,6 +5914,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, > .size = sizeof(kvm_state), > .hdr.vmx.vmxon_pa = -1ull, > .hdr.vmx.vmcs12_pa = -1ull, > + .hdr.vmx.preemption_timer_deadline = 0, > }; > struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = > &user_kvm_nested_state->data.vmx[0];