Hi Jim, Some comments below. Jim Mattson <jmattson@xxxxxxxxxx> writes: > If a shadow VMCS is referenced by the VMCS link pointer in the > current VMCS, then VM-entry makes the shadow VMCS active on the > current processor. No VMCS should ever be active on more than one > processor. If a VMCS is to be migrated from one processor to > another, software should execute a VMCLEAR for the VMCS on the > first processor before the VMCS is made active on the second > processor. > > We already keep track of ordinary VMCSs that are made active by > VMPTRLD. Extend that tracking to handle shadow VMCSs that are > made active by VM-entry to their parents. > > Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> > > arch/x86/kvm/vmx.c | 73 +++++++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 53 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 64a79f2..dd38521 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -398,7 +398,7 @@ struct nested_vmx { > /* The host-usable pointer to the above */ > struct page *current_vmcs12_page; > struct vmcs12 *current_vmcs12; > - struct vmcs *current_shadow_vmcs; > + struct loaded_vmcs current_shadow_vmcs; > /* > * Indicates if the shadow vmcs must be updated with the > * data hold by vmcs12 > @@ -2113,6 +2113,15 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) > new.control) != old.control); > } > > +static void record_loaded_vmcs(struct loaded_vmcs *loaded_vmcs, int cpu) > +{ > + if (loaded_vmcs->vmcs) { > + list_add(&loaded_vmcs->loaded_vmcss_on_cpu_link, > + &per_cpu(loaded_vmcss_on_cpu, cpu)); > + loaded_vmcs->cpu = cpu; > + } > +} > + > /* > * Switches to specified vcpu, until a matching vcpu_put(), but assumes > * vcpu mutex is already taken. > @@ -2124,8 +2133,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > > if (!vmm_exclusive) > kvm_cpu_vmxon(phys_addr); > - else if (vmx->loaded_vmcs->cpu != cpu) > + else if (vmx->loaded_vmcs->cpu != cpu) { > loaded_vmcs_clear(vmx->loaded_vmcs); > + if (vmx->nested.current_shadow_vmcs.vmcs) > + loaded_vmcs_clear(&vmx->nested.current_shadow_vmcs); > + } > > if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { > per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; > @@ -2147,8 +2159,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > */ > smp_rmb(); > > - list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, > - &per_cpu(loaded_vmcss_on_cpu, cpu)); > + record_loaded_vmcs(vmx->loaded_vmcs, cpu); > + record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu); > crash_enable_local_vmclear(cpu); > local_irq_enable(); > > @@ -2161,8 +2173,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > > rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); > vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ > - > - vmx->loaded_vmcs->cpu = cpu; > } Is the order of setting loaded_vmcs->cpu important here ? Your patch changed it, I can't think of anything wrong with it however... > /* Setup TSC multiplier */ > @@ -6812,6 +6822,34 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, > return 0; > } > > +static int setup_shadow_vmcs(struct vcpu_vmx *vmx) > +{ > + struct vmcs *shadow_vmcs; > + int cpu; > + > + shadow_vmcs = alloc_vmcs(); > + if (!shadow_vmcs) > + return -ENOMEM; > + > + /* mark vmcs as shadow */ > + shadow_vmcs->revision_id |= (1u << 31); > + /* init shadow vmcs */ > + vmx->nested.current_shadow_vmcs.vmcs = shadow_vmcs; > + loaded_vmcs_init(&vmx->nested.current_shadow_vmcs); > + > + cpu = get_cpu(); > + local_irq_disable(); > + crash_disable_local_vmclear(cpu); > + > + record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu); > + > + crash_enable_local_vmclear(cpu); > + local_irq_enable(); > + put_cpu(); > + > + return 0; > +} > + > /* > * Emulate the VMXON instruction. > * Currently, we just remember that VMX is active, and do not save or even > @@ -6824,7 +6862,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) > { > struct kvm_segment cs; > struct vcpu_vmx *vmx = to_vmx(vcpu); > - struct vmcs *shadow_vmcs; > const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED > | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; > > @@ -6867,14 +6904,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) > } > > if (enable_shadow_vmcs) { > - shadow_vmcs = alloc_vmcs(); > - if (!shadow_vmcs) > - return -ENOMEM; > - /* mark vmcs as shadow */ > - shadow_vmcs->revision_id |= (1u << 31); > - /* init shadow vmcs */ > - vmcs_clear(shadow_vmcs); > - vmx->nested.current_shadow_vmcs = shadow_vmcs; > + int ret = setup_shadow_vmcs(vmx); > + if (ret) > + return ret; Nit: if (setup_shadow_vmcs(vmx)) return -ENOMEM; Also, isn't it better to actually add the shadow vmcs to the list in handle_vmptrld than in handle_vmon ? That is where shadow is actually enabled and the link pointer set. > } > > INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); > @@ -6959,7 +6991,7 @@ static void free_nested(struct vcpu_vmx *vmx) > free_vpid(vmx->nested.vpid02); > nested_release_vmcs12(vmx); > if (enable_shadow_vmcs) > - free_vmcs(vmx->nested.current_shadow_vmcs); > + free_loaded_vmcs(&vmx->nested.current_shadow_vmcs); > /* Unpin physical memory we referred to in current vmcs02 */ > if (vmx->nested.apic_access_page) { > nested_release_page(vmx->nested.apic_access_page); > @@ -7135,7 +7167,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) > int i; > unsigned long field; > u64 field_value; > - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; > + struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs; > const unsigned long *fields = shadow_read_write_fields; > const int num_fields = max_shadow_read_write_fields; > > @@ -7184,7 +7216,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) > int i, q; > unsigned long field; > u64 field_value = 0; > - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; > + struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs; > > vmcs_load(shadow_vmcs); > > @@ -7364,10 +7396,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) > vmx->nested.current_vmcs12 = new_vmcs12; > vmx->nested.current_vmcs12_page = page; > if (enable_shadow_vmcs) { > + struct vmcs *shadow_vmcs; > + shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs; Nit: struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs; Bandan > vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, > SECONDARY_EXEC_SHADOW_VMCS); > - vmcs_write64(VMCS_LINK_POINTER, > - __pa(vmx->nested.current_shadow_vmcs)); > + vmcs_write64(VMCS_LINK_POINTER, __pa(shadow_vmcs)); > vmx->nested.sync_shadow_vmcs = true; > } > } > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html