Re: [PATCH 2/4] KVM: nVMX: track dirty state of non-shadowed VMCS fields

Wanpeng Li <kernellwp@xxxxxxxxx> · Mon, 25 Dec 2017 11:03:55 +0800

2017-12-21 20:43 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
> VMCS12 fields that are not handled through shadow VMCS are rarely
> written, and thus they are also almost constant in the vmcs02.  We can
> thus optimize prepare_vmcs02 by skipping all the work for non-shadowed
> fields in the common case.
>
> This patch introduces the (pretty simple) tracking infrastructure; the
> next patches will move work to prepare_vmcs02_full and save a few hundred
> clock cycles per VMRESUME on a Haswell Xeon E5 system:
>
>                                         before  after
>         cpuid                           14159   13869
>         vmcall                          15290   14951
>         inl_from_kernel                 17703   17447
>         outl_to_kernel                  16011   14692
>         self_ipi_sti_nop                16763   15825
>         self_ipi_tpr_sti_nop            17341   15935
>         wr_tsc_adjust_msr               14510   14264
>         rd_tsc_adjust_msr               15018   14311
>         mmio-wildcard-eventfd:pci-mem   16381   14947
>         mmio-datamatch-eventfd:pci-mem  18620   17858
>         portio-wildcard-eventfd:pci-io  15121   14769
>         portio-datamatch-eventfd:pci-io 15761   14831
>
> (average savings 748, stdev 460).
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/kvm/vmx.c | 29 ++++++++++++++++++++++++++++-
>  1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2ee842990976..8b6013b529b3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -441,6 +441,7 @@ struct nested_vmx {
>          * data hold by vmcs12
>          */
>         bool sync_shadow_vmcs;
> +       bool dirty_vmcs12;
>
>         bool change_vmcs01_virtual_x2apic_mode;
>         /* L2 must run next, and mustn't decide to exit to L1. */
> @@ -7879,8 +7880,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>  {
>         unsigned long field;
>         gva_t gva;
> +       struct vcpu_vmx *vmx = to_vmx(vcpu);
>         unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>         u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +
>         /* The value to write might be 32 or 64 bits, depending on L1's long
>          * mode, and eventually we need to write that into a field of several
>          * possible lengths. The code below first zero-extends the value to 64
> @@ -7923,6 +7926,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
>                 return kvm_skip_emulated_instruction(vcpu);
>         }
>
> +       switch (field) {
> +#define SHADOW_FIELD_RW(x) case x:
> +#include "vmx_shadow_fields.h"

What's will happen here if enable_shadow_vmcs == false?

Regards,
Wanpeng Li

> +               /*
> +                * The fields that can be updated by L1 without a vmexit are
> +                * always updated in the vmcs02, the others go down the slow
> +                * path of prepare_vmcs02.
> +                */
> +               break;
> +       default:
> +               vmx->nested.dirty_vmcs12 = true;
> +               break;
> +       }
> +
>         nested_vmx_succeed(vcpu);
>         return kvm_skip_emulated_instruction(vcpu);
>  }
> @@ -7937,6 +7954,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
>                              __pa(vmx->vmcs01.shadow_vmcs));
>                 vmx->nested.sync_shadow_vmcs = true;
>         }
> +       vmx->nested.dirty_vmcs12 = true;
>  }
>
>  /* Emulate the VMPTRLD instruction */
> @@ -10569,6 +10587,11 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
>         return 0;
>  }
>
> +static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> +                              bool from_vmentry)
> +{
> +}
> +
>  /*
>   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
>   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
> @@ -10864,7 +10887,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>                         vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>                         vmx_flush_tlb(vcpu, true);
>                 }
> -
>         }
>
>         if (enable_pml) {
> @@ -10913,6 +10935,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>         /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
>         vmx_set_efer(vcpu, vcpu->arch.efer);
>
> +       if (vmx->nested.dirty_vmcs12) {
> +               prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
> +               vmx->nested.dirty_vmcs12 = false;
> +       }
> +
>         /* Shadow page tables on either EPT or shadow page tables. */
>         if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
>                                 entry_failure_code))
> --
> 1.8.3.1
>
>