Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> On Thu, Dec 21, 2017 at 4:43 AM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > VMCS12 fields that are not handled through shadow VMCS are rarely > written, and thus they are also almost constant in the vmcs02. We can > thus optimize prepare_vmcs02 by skipping all the work for non-shadowed > fields in the common case. > > This patch introduces the (pretty simple) tracking infrastructure; the > next patches will move work to prepare_vmcs02_full and save a few hundred > clock cycles per VMRESUME on a Haswell Xeon E5 system: > > before after > cpuid 14159 13869 > vmcall 15290 14951 > inl_from_kernel 17703 17447 > outl_to_kernel 16011 14692 > self_ipi_sti_nop 16763 15825 > self_ipi_tpr_sti_nop 17341 15935 > wr_tsc_adjust_msr 14510 14264 > rd_tsc_adjust_msr 15018 14311 > mmio-wildcard-eventfd:pci-mem 16381 14947 > mmio-datamatch-eventfd:pci-mem 18620 17858 > portio-wildcard-eventfd:pci-io 15121 14769 > portio-datamatch-eventfd:pci-io 15761 14831 > > (average savings 748, stdev 460). > > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/vmx.c | 29 ++++++++++++++++++++++++++++- > 1 file changed, 28 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 2ee842990976..8b6013b529b3 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -441,6 +441,7 @@ struct nested_vmx { > * data hold by vmcs12 > */ > bool sync_shadow_vmcs; > + bool dirty_vmcs12; > > bool change_vmcs01_virtual_x2apic_mode; > /* L2 must run next, and mustn't decide to exit to L1. */ > @@ -7879,8 +7880,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) > { > unsigned long field; > gva_t gva; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > + > /* The value to write might be 32 or 64 bits, depending on L1's long > * mode, and eventually we need to write that into a field of several > * possible lengths. The code below first zero-extends the value to 64 > @@ -7923,6 +7926,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) > return kvm_skip_emulated_instruction(vcpu); > } > > + switch (field) { > +#define SHADOW_FIELD_RW(x) case x: > +#include "vmx_shadow_fields.h" > + /* > + * The fields that can be updated by L1 without a vmexit are > + * always updated in the vmcs02, the others go down the slow > + * path of prepare_vmcs02. > + */ > + break; > + default: > + vmx->nested.dirty_vmcs12 = true; > + break; > + } > + > nested_vmx_succeed(vcpu); > return kvm_skip_emulated_instruction(vcpu); > } > @@ -7937,6 +7954,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) > __pa(vmx->vmcs01.shadow_vmcs)); > vmx->nested.sync_shadow_vmcs = true; > } > + vmx->nested.dirty_vmcs12 = true; > } > > /* Emulate the VMPTRLD instruction */ > @@ -10569,6 +10587,11 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne > return 0; > } > > +static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, > + bool from_vmentry) > +{ > +} > + > /* > * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested > * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it > @@ -10864,7 +10887,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, > vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); > vmx_flush_tlb(vcpu, true); > } > - > } > > if (enable_pml) { > @@ -10913,6 +10935,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, > /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ > vmx_set_efer(vcpu, vcpu->arch.efer); > > + if (vmx->nested.dirty_vmcs12) { > + prepare_vmcs02_full(vcpu, vmcs12, from_vmentry); > + vmx->nested.dirty_vmcs12 = false; > + } > + > /* Shadow page tables on either EPT or shadow page tables. */ > if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), > entry_failure_code)) > -- > 1.8.3.1 > >