> From: Tian, Kevin > Sent: Friday, May 20, 2011 4:05 PM > > > From: Nadav Har'El > > Sent: Tuesday, May 17, 2011 3:48 AM > > > > We saw in a previous patch that L1 controls its L2 guest with a vcms12. > > L0 needs to create a real VMCS for running L2. We call that "vmcs02". > > A later patch will contain the code, prepare_vmcs02(), for filling the vmcs02 > > fields. This patch only contains code for allocating vmcs02. > > > > In this version, prepare_vmcs02() sets *all* of vmcs02's fields each time we > > enter from L1 to L2, so keeping just one vmcs02 for the vcpu is enough: It can > > be reused even when L1 runs multiple L2 guests. However, in future versions > > we'll probably want to add an optimization where vmcs02 fields that rarely > > change will not be set each time. For that, we may want to keep around > several > > vmcs02s of L2 guests that have recently run, so that potentially we could run > > these L2s again more quickly because less vmwrites to vmcs02 will be > needed. > > That would be a neat enhancement and should have an obvious improvement. > Possibly we can maintain the vmcs02 pool along with L1 VMCLEAR ops, which > is similar to the hardware behavior regarding to cleared and launched state. > > > > > This patch adds to each vcpu a vmcs02 pool, vmx->nested.vmcs02_pool, > > which remembers the vmcs02s last used to run up to VMCS02_POOL_SIZE > L2s. > > As explained above, in the current version we choose VMCS02_POOL_SIZE=1, > > I.e., one vmcs02 is allocated (and loaded onto the processor), and it is > > reused to enter any L2 guest. In the future, when prepare_vmcs02() is > > optimized not to set all fields every time, VMCS02_POOL_SIZE should be > > increased. > > > > Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> > > --- > > arch/x86/kvm/vmx.c | 139 > > +++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 139 insertions(+) > > > > --- .before/arch/x86/kvm/vmx.c 2011-05-16 22:36:47.000000000 +0300 > > +++ .after/arch/x86/kvm/vmx.c 2011-05-16 22:36:47.000000000 +0300 > > @@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_ > > module_param(ple_window, int, S_IRUGO); > > > > #define NR_AUTOLOAD_MSRS 1 > > +#define VMCS02_POOL_SIZE 1 > > > > struct vmcs { > > u32 revision_id; > > @@ -166,6 +167,30 @@ struct __packed vmcs12 { > > #define VMCS12_SIZE 0x1000 > > > > /* > > + * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's > VMCS > > + * while we use L2's VMCS), and we wish to save the previous VMCS, we > must > > also > > + * remember on which CPU it was last loaded (vcpu->cpu), so when we > return > > to > > + * using this VMCS we'll know if we're now running on a different CPU and > > need > > + * to clear the VMCS on the old CPU, and load it on the new one. > Additionally, > > + * we need to remember whether this VMCS was launched (vmx->launched), > > so when > > + * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot > > deduce > > + * this from other state, because it's possible that this VMCS had once been > > + * launched, but has since been cleared after a CPU switch). > > + */ > > +struct saved_vmcs { > > + struct vmcs *vmcs; > > + int cpu; > > + int launched; > > +}; > > "saved" looks a bit misleading here. It's simply a list of all active vmcs02 > tracked > by kvm, isn't it? > > > + > > +/* Used to remember the last vmcs02 used for some recently used vmcs12s > > */ > > +struct vmcs02_list { > > + struct list_head list; > > + gpa_t vmcs12_addr; > > uniform the name 'vmptr' as nested_vmx strucure: > /* The guest-physical address of the current VMCS L1 keeps for L2 */ > gpa_t current_vmptr; > /* The host-usable pointer to the above */ > struct page *current_vmcs12_page; > struct vmcs12 *current_vmcs12; > > you should keep consistent meaning for vmcs12, which means the arch-neutral > state interpreted by KVM only. > > > + struct saved_vmcs vmcs02; > > +}; > > + > > +/* > > * The nested_vmx structure is part of vcpu_vmx, and holds information we > > need > > * for correct emulation of VMX (i.e., nested VMX) on this vcpu. > > */ > > @@ -178,6 +203,10 @@ struct nested_vmx { > > /* The host-usable pointer to the above */ > > struct page *current_vmcs12_page; > > struct vmcs12 *current_vmcs12; > > + > > + /* vmcs02_list cache of VMCSs recently used to run L2 guests */ > > + struct list_head vmcs02_pool; > > + int vmcs02_num; > > }; > > > > struct vcpu_vmx { > > @@ -4200,6 +4229,111 @@ static int handle_invalid_op(struct kvm_ > > } > > > > /* > > + * To run an L2 guest, we need a vmcs02 based the L1-specified vmcs12. > > + * We could reuse a single VMCS for all the L2 guests, but we also want the > > + * option to allocate a separate vmcs02 for each separate loaded vmcs12 - > > this > > + * allows keeping them loaded on the processor, and in the future will allow > > + * optimizations where prepare_vmcs02 doesn't need to set all the fields on > > + * every entry if they never change. > > + * So we keep, in vmx->nested.vmcs02_pool, a cache of size > > VMCS02_POOL_SIZE > > + * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. > > + * > > + * The following functions allocate and free a vmcs02 in this pool. > > + */ > > + > > +static void __nested_free_saved_vmcs(void *arg) > > +{ > > + struct saved_vmcs *saved_vmcs = arg; > > + > > + vmcs_clear(saved_vmcs->vmcs); > > + if (per_cpu(current_vmcs, saved_vmcs->cpu) == saved_vmcs->vmcs) > > + per_cpu(current_vmcs, saved_vmcs->cpu) = NULL; > > +} > > + > > +/* > > + * Free a VMCS, but before that VMCLEAR it on the CPU where it was last > > loaded > > + * (the necessary information is in the saved_vmcs structure). > > + * See also vcpu_clear() (with different parameters and side-effects) > > + */ > > +static void nested_free_saved_vmcs(struct vcpu_vmx *vmx, > > + struct saved_vmcs *saved_vmcs) > > +{ > > + if (saved_vmcs->cpu != -1) > > + smp_call_function_single(saved_vmcs->cpu, > > + __nested_free_saved_vmcs, saved_vmcs, 1); > > + > > + free_vmcs(saved_vmcs->vmcs); > > +} > > + > > +/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) > */ > > +static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) > > +{ > > + struct vmcs02_list *item; > > + list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) > > + if (item->vmcs12_addr == vmptr) { > > + nested_free_saved_vmcs(vmx, &item->vmcs02); > > + list_del(&item->list); > > + kfree(item); > > + vmx->nested.vmcs02_num--; > > + return; > > + } > > +} > > + > > +/* > > + * Free all VMCSs saved for this vcpu, except the actual vmx->vmcs. > > + * These include the VMCSs in vmcs02_pool (except the one currently used, > > + * if running L2), and saved_vmcs01 when running L2. > > + */ > > +static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) > > +{ > > + struct vmcs02_list *item, *n; > > + list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { > > + if (vmx->vmcs != item->vmcs02.vmcs) > > + nested_free_saved_vmcs(vmx, &item->vmcs02); > > + list_del(&item->list); > > + kfree(item); > > + } > > + vmx->nested.vmcs02_num = 0; > > +} > > + > > +/* Get a vmcs02 for the current vmcs12. */ > > +static struct saved_vmcs *nested_get_current_vmcs02(struct vcpu_vmx > > *vmx) > > +{ > > + struct vmcs02_list *item; > > + list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) > > + if (item->vmcs12_addr == vmx->nested.current_vmptr) { > > + list_move(&item->list, &vmx->nested.vmcs02_pool); > > + return &item->vmcs02; > > + } > > + > > + if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { > > + /* Recycle the least recently used VMCS. */ > > + item = list_entry(vmx->nested.vmcs02_pool.prev, > > + struct vmcs02_list, list); > > + item->vmcs12_addr = vmx->nested.current_vmptr; > > + list_move(&item->list, &vmx->nested.vmcs02_pool); > > + return &item->vmcs02; btw, shouldn't you clear recycled VMCS and reset 'cpu' and 'launched' fields? Have you tried SMP L2 guest? Thanks Kevin > > + } > > + > > + /* Create a new vmcs02 */ > > + item = (struct vmcs02_list *) > > + kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); > > + if (!item) > > + return NULL; > > + item->vmcs02.vmcs = alloc_vmcs(); > > + if (!item->vmcs02.vmcs) { > > + kfree(item); > > + return NULL; > > + } > > + item->vmcs12_addr = vmx->nested.current_vmptr; > > + item->vmcs02.cpu = -1; > > + item->vmcs02.launched = 0; > > + list_add(&(item->list), &(vmx->nested.vmcs02_pool)); > > + vmx->nested.vmcs02_num++; > > + return &item->vmcs02; > > +} > > + > > +/* > > * Emulate the VMXON instruction. > > * Currently, we just remember that VMX is active, and do not save or even > > * inspect the argument to VMXON (the so-called "VMXON pointer") > because > > we > > @@ -4235,6 +4369,9 @@ static int handle_vmon(struct kvm_vcpu * > > return 1; > > } > > > > + INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); > > + vmx->nested.vmcs02_num = 0; > > + > > vmx->nested.vmxon = true; > > > > skip_emulated_instruction(vcpu); > > @@ -4286,6 +4423,8 @@ static void free_nested(struct vcpu_vmx > > vmx->nested.current_vmptr = -1ull; > > vmx->nested.current_vmcs12 = NULL; > > } > > + > > + nested_free_all_saved_vmcss(vmx); > > } > > > > /* Emulate the VMXOFF instruction */ > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html