Re: [PATCH 1/2 v3] KVM: nVMX: Fix VMX preemption timer migration

Sean Christopherson <sean.j.christopherson@xxxxxxxxx> · Wed, 20 May 2020 19:44:01 -0700

On Wed, May 20, 2020 at 04:22:27PM -0700, Makarand Sonare wrote:
> From: Peter Shier <pshier@xxxxxxxxxx>
> 
> Add new field to hold preemption timer expiration deadline
> appended to struct kvm_vmx_nested_state_hdr. This is to prevent
> the first VM-Enter after migration from incorrectly restarting the timer
> with the full timer value instead of partially decayed timer value.
> KVM_SET_NESTED_STATE restarts timer using migrated state regardless
> of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.
> 
> Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state")
> 
> Signed-off-by: Peter Shier <pshier@xxxxxxxxxx>
> Signed-off-by: Makarand Sonare <makarandsonare@xxxxxxxxxx>
> Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753
> ---

...

> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 51ebb60e1533a..46dc2ef731b37 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2092,9 +2092,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
>  	return HRTIMER_NORESTART;
>  }
> 
> -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
> +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
> +					u64 preemption_timeout)
>  {
> -	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
>  	/*
> @@ -3353,8 +3353,21 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
>  	 * the timer.
>  	 */
>  	vmx->nested.preemption_timer_expired = false;
> -	if (nested_cpu_has_preemption_timer(vmcs12))
> -		vmx_start_preemption_timer(vcpu);
> +	if (nested_cpu_has_preemption_timer(vmcs12)) {
> +		u64 timer_value = 0;

Personal preference would be to zero this in an else case.  More to make it
obvious there isn't an uninitialized access than to shave cycles.  Or get
rid of timer_value altogether (see below).

> +		u64 l1_scaled_tsc_value = (kvm_read_l1_tsc(vcpu, rdtsc())

Dropping the "_value" can help avoid some wraps, i.e. use l1_scaled_tsc.

> +					   >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);

Parantheses around the entire expression are unnecessary.  And I strongly
prefer the operator on the previous line, not sure how others feel.

> +
> +		if (!vmx->nested.has_preemption_timer_deadline) {
> +			timer_value = vmcs12->vmx_preemption_timer_value;
> +			vmx->nested.preemption_timer_deadline = timer_value +
> +								l1_scaled_tsc_value;
> +			vmx->nested.has_preemption_timer_deadline = true;
> +		} else if (l1_scaled_tsc_value <= vmx->nested.preemption_timer_deadline)

Not that it matters, but this can be '<'.

> +			timer_value = vmx->nested.preemption_timer_deadline -
> +				      l1_scaled_tsc_value;
> +		vmx_start_preemption_timer(vcpu, timer_value);

Any objection to moving this to a separate helper?  It'd reduce the indentation
enough that, if combined with shorter names, would eliminate the line wraps and
yield a nice diff to boot.  E.g. something like:

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6a4f06c2e741..d204aa0910c2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2093,9 +2093,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }

-static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+static void __vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
+                                        u64 preemption_timeout)
 {
-       u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
        struct vcpu_vmx *vmx = to_vmx(vcpu);

        /*
@@ -2117,6 +2117,27 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
                      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
 }

+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
+                                      struct vmcs12 *vmcs12)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
+                               VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+       u64 val;
+
+       if (!vmx->nested.has_preemption_timer_deadline) {
+               val = vmcs12->vmx_preemption_timer_value;
+               vmx->nested.has_preemption_timer_deadline = true;
+               vmx->nested.preemption_timer_deadline = val + l1_scaled_tsc;
+       } else if (vmx->nested.preemption_timer_deadline > l1_scaled_tsc) {
+               val = vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
+       } else {
+               val = 0;
+       }
+       __vmx_start_preemption_timer(vcpu, val);
+}
+
+
 static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 {
        if (vmx->nested.nested_run_pending &&
@@ -3358,7 +3379,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
         */
        vmx->nested.preemption_timer_expired = false;
        if (nested_cpu_has_preemption_timer(vmcs12))
-               vmx_start_preemption_timer(vcpu);
+               vmx_start_preemption_timer(vcpu, vmcs12);

        /*
         * Note no nested_vmx_succeed or nested_vmx_fail here. At this point

> +	}
> 
>  	/*
>  	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -3462,6 +3475,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>  	 * the nested entry.
>  	 */
>  	vmx->nested.nested_run_pending = 1;
> +	vmx->nested.has_preemption_timer_deadline = false;
>  	status = nested_vmx_enter_non_root_mode(vcpu, true);
>  	if (unlikely(status != NVMX_VMENTRY_SUCCESS))
>  		goto vmentry_failed;
> @@ -3962,9 +3976,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
> 
>  	if (nested_cpu_has_preemption_timer(vmcs12) &&
> -	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
> +	    !vmx->nested.nested_run_pending) {
> +		if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)

The extra if statement is unnecessary, which in turn makes the curly braces
unnecessary.

>  			vmcs12->vmx_preemption_timer_value =
>  				vmx_get_preemption_timer_value(vcpu);
> +	}
> 
>  	/*
>  	 * In some cases (usually, nested EPT), L2 is allowed to change its
> @@ -5898,6 +5914,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  		.size = sizeof(kvm_state),
>  		.hdr.vmx.vmxon_pa = -1ull,
>  		.hdr.vmx.vmcs12_pa = -1ull,
> +		.hdr.vmx.preemption_timer_deadline = 0,
>  	};
>  	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
>  		&user_kvm_nested_state->data.vmx[0];