> From: Nadav Har'El > Sent: Tuesday, May 24, 2011 8:26 PM > > Hi Avi, here is a updated version of the loaded_vmcs patch which you asked me > to send before the rest of the nvmx patches. > > Please let me know when you'd like me to send you an updated version of > the rest of the patches. > > ---------------------------------------------------------------------------- > Subject: [PATCH 01/31] nVMX: Keep list of loaded VMCSs, instead of vcpus. > > In VMX, before we bring down a CPU we must VMCLEAR all VMCSs loaded on it > because (at least in theory) the processor might not have written all of its > content back to memory. Since a patch from June 26, 2008, this is done using > a per-cpu "vcpus_on_cpu" linked list of vcpus loaded on each CPU. > > The problem is that with nested VMX, we no longer have the concept of a > vcpu being loaded on a cpu: A vcpu has multiple VMCSs (one for L1, a pool for > L2s), and each of those may be have been last loaded on a different cpu. > > So instead of linking the vcpus, we link the VMCSs, using a new structure > loaded_vmcs. This structure contains the VMCS, and the information > pertaining > to its loading on a specific cpu (namely, the cpu number, and whether it > was already launched on this cpu once). In nested we will also use the same > structure to hold L2 VMCSs, and vmx->loaded_vmcs is a pointer to the > currently active VMCS. > > Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx> Thanks Kevin > --- > arch/x86/kvm/vmx.c | 150 ++++++++++++++++++++++++------------------- > 1 file changed, 86 insertions(+), 64 deletions(-) > > --- .before/arch/x86/kvm/vmx.c 2011-05-24 15:12:22.000000000 +0300 > +++ .after/arch/x86/kvm/vmx.c 2011-05-24 15:12:22.000000000 +0300 > @@ -116,6 +116,18 @@ struct vmcs { > char data[0]; > }; > > +/* > + * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also > + * remember whether it was VMLAUNCHed, and maintain a linked list of all > VMCSs > + * loaded on this CPU (so we can clear them if the CPU goes down). > + */ > +struct loaded_vmcs { > + struct vmcs *vmcs; > + int cpu; > + int launched; > + struct list_head loaded_vmcss_on_cpu_link; > +}; > + > struct shared_msr_entry { > unsigned index; > u64 data; > @@ -124,9 +136,7 @@ struct shared_msr_entry { > > struct vcpu_vmx { > struct kvm_vcpu vcpu; > - struct list_head local_vcpus_link; > unsigned long host_rsp; > - int launched; > u8 fail; > u8 cpl; > bool nmi_known_unmasked; > @@ -140,7 +150,14 @@ struct vcpu_vmx { > u64 msr_host_kernel_gs_base; > u64 msr_guest_kernel_gs_base; > #endif > - struct vmcs *vmcs; > + /* > + * loaded_vmcs points to the VMCS currently used in this vcpu. For a > + * non-nested (L1) guest, it always points to vmcs01. For a nested > + * guest (L2), it points to a different VMCS. > + */ > + struct loaded_vmcs vmcs01; > + struct loaded_vmcs *loaded_vmcs; > + bool __launched; /* temporary, used in > vmx_vcpu_run */ > struct msr_autoload { > unsigned nr; > struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; > @@ -200,7 +217,11 @@ static int vmx_set_tss_addr(struct kvm * > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > -static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); > +/* > + * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is > needed > + * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded > on it. > + */ > +static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); > static DEFINE_PER_CPU(struct desc_ptr, host_gdt); > > static unsigned long *vmx_io_bitmap_a; > @@ -501,6 +522,13 @@ static void vmcs_clear(struct vmcs *vmcs > vmcs, phys_addr); > } > > +static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) > +{ > + vmcs_clear(loaded_vmcs->vmcs); > + loaded_vmcs->cpu = -1; > + loaded_vmcs->launched = 0; > +} > + > static void vmcs_load(struct vmcs *vmcs) > { > u64 phys_addr = __pa(vmcs); > @@ -514,25 +542,24 @@ static void vmcs_load(struct vmcs *vmcs) > vmcs, phys_addr); > } > > -static void __vcpu_clear(void *arg) > +static void __loaded_vmcs_clear(void *arg) > { > - struct vcpu_vmx *vmx = arg; > + struct loaded_vmcs *loaded_vmcs = arg; > int cpu = raw_smp_processor_id(); > > - if (vmx->vcpu.cpu == cpu) > - vmcs_clear(vmx->vmcs); > - if (per_cpu(current_vmcs, cpu) == vmx->vmcs) > + if (loaded_vmcs->cpu != cpu) > + return; /* vcpu migration can race with cpu offline */ > + if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) > per_cpu(current_vmcs, cpu) = NULL; > - list_del(&vmx->local_vcpus_link); > - vmx->vcpu.cpu = -1; > - vmx->launched = 0; > + list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); > + loaded_vmcs_init(loaded_vmcs); > } > > -static void vcpu_clear(struct vcpu_vmx *vmx) > +static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) > { > - if (vmx->vcpu.cpu == -1) > - return; > - smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); > + if (loaded_vmcs->cpu != -1) > + smp_call_function_single( > + loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1); > } > > static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) > @@ -971,22 +998,22 @@ static void vmx_vcpu_load(struct kvm_vcp > > if (!vmm_exclusive) > kvm_cpu_vmxon(phys_addr); > - else if (vcpu->cpu != cpu) > - vcpu_clear(vmx); > + else if (vmx->loaded_vmcs->cpu != cpu) > + loaded_vmcs_clear(vmx->loaded_vmcs); > > - if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { > - per_cpu(current_vmcs, cpu) = vmx->vmcs; > - vmcs_load(vmx->vmcs); > + if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { > + per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; > + vmcs_load(vmx->loaded_vmcs->vmcs); > } > > - if (vcpu->cpu != cpu) { > + if (vmx->loaded_vmcs->cpu != cpu) { > struct desc_ptr *gdt = &__get_cpu_var(host_gdt); > unsigned long sysenter_esp; > > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > local_irq_disable(); > - list_add(&vmx->local_vcpus_link, > - &per_cpu(vcpus_on_cpu, cpu)); > + list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, > + &per_cpu(loaded_vmcss_on_cpu, cpu)); > local_irq_enable(); > > /* > @@ -998,6 +1025,7 @@ static void vmx_vcpu_load(struct kvm_vcp > > rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); > vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ > + vmx->loaded_vmcs->cpu = cpu; > } > } > > @@ -1005,7 +1033,8 @@ static void vmx_vcpu_put(struct kvm_vcpu > { > __vmx_load_host_state(to_vmx(vcpu)); > if (!vmm_exclusive) { > - __vcpu_clear(to_vmx(vcpu)); > + __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); > + vcpu->cpu = -1; > kvm_cpu_vmxoff(); > } > } > @@ -1469,7 +1498,7 @@ static int hardware_enable(void *garbage > if (read_cr4() & X86_CR4_VMXE) > return -EBUSY; > > - INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); > + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); > rdmsrl(MSR_IA32_FEATURE_CONTROL, old); > > test_bits = FEATURE_CONTROL_LOCKED; > @@ -1493,14 +1522,14 @@ static int hardware_enable(void *garbage > return 0; > } > > -static void vmclear_local_vcpus(void) > +static void vmclear_local_loaded_vmcss(void) > { > int cpu = raw_smp_processor_id(); > - struct vcpu_vmx *vmx, *n; > + struct loaded_vmcs *v, *n; > > - list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu), > - local_vcpus_link) > - __vcpu_clear(vmx); > + list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), > + loaded_vmcss_on_cpu_link) > + __loaded_vmcs_clear(v); > } > > > @@ -1515,7 +1544,7 @@ static void kvm_cpu_vmxoff(void) > static void hardware_disable(void *garbage) > { > if (vmm_exclusive) { > - vmclear_local_vcpus(); > + vmclear_local_loaded_vmcss(); > kvm_cpu_vmxoff(); > } > write_cr4(read_cr4() & ~X86_CR4_VMXE); > @@ -1696,6 +1725,18 @@ static void free_vmcs(struct vmcs *vmcs) > free_pages((unsigned long)vmcs, vmcs_config.order); > } > > +/* > + * Free a VMCS, but before that VMCLEAR it on the CPU where it was last > loaded > + */ > +static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) > +{ > + if (!loaded_vmcs->vmcs) > + return; > + loaded_vmcs_clear(loaded_vmcs); > + free_vmcs(loaded_vmcs->vmcs); > + loaded_vmcs->vmcs = NULL; > +} > + > static void free_kvm_area(void) > { > int cpu; > @@ -4166,6 +4207,7 @@ static void __noclone vmx_vcpu_run(struc > if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) > vmx_set_interrupt_shadow(vcpu, 0); > > + vmx->__launched = vmx->loaded_vmcs->launched; > asm( > /* Store host registers */ > "push %%"R"dx; push %%"R"bp;" > @@ -4236,7 +4278,7 @@ static void __noclone vmx_vcpu_run(struc > "pop %%"R"bp; pop %%"R"dx \n\t" > "setbe %c[fail](%0) \n\t" > : : "c"(vmx), "d"((unsigned long)HOST_RSP), > - [launched]"i"(offsetof(struct vcpu_vmx, launched)), > + [launched]"i"(offsetof(struct vcpu_vmx, __launched)), > [fail]"i"(offsetof(struct vcpu_vmx, fail)), > [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), > [rax]"i"(offsetof(struct vcpu_vmx, > vcpu.arch.regs[VCPU_REGS_RAX])), > @@ -4276,7 +4318,7 @@ static void __noclone vmx_vcpu_run(struc > vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); > > asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); > - vmx->launched = 1; > + vmx->loaded_vmcs->launched = 1; > > vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); > > @@ -4288,41 +4330,17 @@ static void __noclone vmx_vcpu_run(struc > #undef R > #undef Q > > -static void vmx_free_vmcs(struct kvm_vcpu *vcpu) > -{ > - struct vcpu_vmx *vmx = to_vmx(vcpu); > - > - if (vmx->vmcs) { > - vcpu_clear(vmx); > - free_vmcs(vmx->vmcs); > - vmx->vmcs = NULL; > - } > -} > - > static void vmx_free_vcpu(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > > free_vpid(vmx); > - vmx_free_vmcs(vcpu); > + free_loaded_vmcs(vmx->loaded_vmcs); > kfree(vmx->guest_msrs); > kvm_vcpu_uninit(vcpu); > kmem_cache_free(kvm_vcpu_cache, vmx); > } > > -static inline void vmcs_init(struct vmcs *vmcs) > -{ > - u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); > - > - if (!vmm_exclusive) > - kvm_cpu_vmxon(phys_addr); > - > - vmcs_clear(vmcs); > - > - if (!vmm_exclusive) > - kvm_cpu_vmxoff(); > -} > - > static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > { > int err; > @@ -4344,11 +4362,15 @@ static struct kvm_vcpu *vmx_create_vcpu( > goto uninit_vcpu; > } > > - vmx->vmcs = alloc_vmcs(); > - if (!vmx->vmcs) > + vmx->loaded_vmcs = &vmx->vmcs01; > + vmx->loaded_vmcs->vmcs = alloc_vmcs(); > + if (!vmx->loaded_vmcs->vmcs) > goto free_msrs; > - > - vmcs_init(vmx->vmcs); > + if (!vmm_exclusive) > + kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); > + loaded_vmcs_init(vmx->loaded_vmcs); > + if (!vmm_exclusive) > + kvm_cpu_vmxoff(); > > cpu = get_cpu(); > vmx_vcpu_load(&vmx->vcpu, cpu); > @@ -4377,7 +4399,7 @@ static struct kvm_vcpu *vmx_create_vcpu( > return &vmx->vcpu; > > free_vmcs: > - free_vmcs(vmx->vmcs); > + free_vmcs(vmx->loaded_vmcs->vmcs); > free_msrs: > kfree(vmx->guest_msrs); > uninit_vcpu: > > -- > Nadav Har'El | Tuesday, May 24 2011, 20 > Iyyar 5771 > nyh@xxxxxxxxxxxxxxxxxxx |----------------------------------------- > Phone +972-523-790466, ICQ 13349191 |I am thinking about a new signature. > Stay > http://nadav.harel.org.il |tuned. > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html