Re: [PATCH v3 2/5] KVM: nVMX: Rework event injection and recovery

Jan Kiszka <jan.kiszka@xxxxxx> · Wed, 10 Apr 2013 15:49:35 +0200

On 2013-04-10 15:42, Gleb Natapov wrote:
> On Sun, Mar 24, 2013 at 07:44:45PM +0100, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
>>
>> The basic idea is to always transfer the pending event injection on
>> vmexit into the architectural state of the VCPU and then drop it from
>> there if it turns out that we left L2 to enter L1, i.e. if we enter
>> prepare_vmcs12.
>>
>> vmcs12_save_pending_events takes care to transfer pending L0 events into
>> the queue of L1. That is mandatory as L1 may decide to switch the guest
>> state completely, invalidating or preserving the pending events for
>> later injection (including on a different node, once we support
>> migration).
>>
>> This concept is based on the rule that a pending vmlaunch/vmresume is
>> not canceled. Otherwise, we would risk to lose injected events or leak
>> them into the wrong queues. Encode this rule via a WARN_ON_ONCE at the
>> entry of nested_vmx_vmexit.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
>> ---
>>  arch/x86/kvm/vmx.c |   90 +++++++++++++++++++++++++++++++++------------------
>>  1 files changed, 58 insertions(+), 32 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 8827b3b..9d9ff74 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -6493,8 +6493,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
>>  
>>  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>>  {
>> -	if (is_guest_mode(&vmx->vcpu))
>> -		return;
>>  	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
>>  				  VM_EXIT_INSTRUCTION_LEN,
>>  				  IDT_VECTORING_ERROR_CODE);
>> @@ -6502,8 +6500,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>>  
>>  static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
>>  {
>> -	if (is_guest_mode(vcpu))
>> -		return;
>>  	__vmx_complete_interrupts(vcpu,
>>  				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
>>  				  VM_ENTRY_INSTRUCTION_LEN,
>> @@ -6535,21 +6531,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>  	unsigned long debugctlmsr;
>>  
>> -	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
>> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> -		if (vmcs12->idt_vectoring_info_field &
>> -				VECTORING_INFO_VALID_MASK) {
>> -			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
>> -				vmcs12->idt_vectoring_info_field);
>> -			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
>> -				vmcs12->vm_exit_instruction_len);
>> -			if (vmcs12->idt_vectoring_info_field &
>> -					VECTORING_INFO_DELIVER_CODE_MASK)
>> -				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
>> -					vmcs12->idt_vectoring_error_code);
>> -		}
>> -	}
>> -
>>  	/* Record the guest's net vcpu time for enforced NMI injections. */
>>  	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
>>  		vmx->entry_time = ktime_get();
>> @@ -6708,17 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>  
>>  	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
>>  
>> -	if (is_guest_mode(vcpu)) {
>> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> -		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
>> -		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
>> -			vmcs12->idt_vectoring_error_code =
>> -				vmcs_read32(IDT_VECTORING_ERROR_CODE);
>> -			vmcs12->vm_exit_instruction_len =
>> -				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>> -		}
>> -	}
>> -
>>  	vmx->loaded_vmcs->launched = 1;
>>  
>>  	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
>> @@ -7325,6 +7295,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>  			vcpu->arch.cr4_guest_owned_bits));
>>  }
>>  
>> +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
>> +				       struct vmcs12 *vmcs12)
>> +{
>> +	u32 idt_vectoring;
>> +	unsigned int nr;
>> +
>> +	if (vcpu->arch.exception.pending) {
>> +		nr = vcpu->arch.exception.nr;
>> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>> +
>> +		if (kvm_exception_is_soft(nr)) {
>> +			vmcs12->vm_exit_instruction_len =
>> +				vcpu->arch.event_exit_inst_len;
>> +			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
>> +		} else
>> +			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
>> +
>> +		if (vcpu->arch.exception.has_error_code) {
>> +			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
>> +			vmcs12->idt_vectoring_error_code =
>> +				vcpu->arch.exception.error_code;
>> +		}
>> +
>> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
>> +	} else if (vcpu->arch.nmi_pending) {
>> +		vmcs12->idt_vectoring_info_field =
>> +			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
>> +	} else if (vcpu->arch.interrupt.pending) {
>> +		nr = vcpu->arch.interrupt.nr;
>> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>> +
>> +		if (vcpu->arch.interrupt.soft) {
>> +			idt_vectoring |= INTR_TYPE_SOFT_INTR;
>> +			vmcs12->vm_entry_instruction_len =
>> +				vcpu->arch.event_exit_inst_len;
>> +		} else
>> +			idt_vectoring |= INTR_TYPE_EXT_INTR;
>> +
>> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
>> +	}
>> +}
>> +
>>  /*
>>   * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
>>   * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
>> @@ -7416,9 +7428,20 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>  	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>>  	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
>>  
>> -	/* clear vm-entry fields which are to be cleared on exit */
>>  	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
>> -		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
> Why have you dropped this? Where is it cleaned now?

Hmm, looks like I read something like "vm_exit_intr_info". Will restore
and just improve the comment.

Jan

Attachment:
signature.asc

Description: OpenPGP digital signature