We saw in a previous patch that L1 controls its L2 guest with a vcms12. L0 needs to create a real VMCS for running L2. We call that "vmcs02". A later patch will contain the code, prepare_vmcs02(), for filling the vmcs02 fields. This patch only contains code for allocating vmcs02. In this version, prepare_vmcs02() sets *all* of vmcs02's fields each time we enter from L1 to L2, so keeping just one vmcs02 for the vcpu is enough: It can be reused even when L1 runs multiple L2 guests. However, in future versions we'll probably want to add an optimization where vmcs02 fields that rarely change will not be set each time. For that, we may want to keep around several vmcs02s of L2 guests that have recently run, so that potentially we could run these L2s again more quickly because less vmwrites to vmcs02 will be needed. This patch adds to each vcpu a vmcs02 pool, vmx->nested.vmcs02_pool, which remembers the vmcs02s last used to run up to VMCS02_POOL_SIZE L2s. As explained above, in the current version we choose VMCS02_POOL_SIZE=1, I.e., one vmcs02 is allocated (and loaded onto the processor), and it is reused to enter any L2 guest. In the future, when prepare_vmcs02() is optimized not to set all fields every time, VMCS02_POOL_SIZE should be increased. Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> --- arch/x86/kvm/vmx.c | 134 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) --- .before/arch/x86/kvm/vmx.c 2011-05-08 10:43:18.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2011-05-08 10:43:18.000000000 +0300 @@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_ module_param(ple_window, int, S_IRUGO); #define NR_AUTOLOAD_MSRS 1 +#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -166,6 +167,30 @@ struct __packed vmcs12 { #define VMCS12_SIZE 0x1000 /* + * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's VMCS + * while we use L2's VMCS), and we wish to save the previous VMCS, we must also + * remember on which CPU it was last loaded (vcpu->cpu), so when we return to + * using this VMCS we'll know if we're now running on a different CPU and need + * to clear the VMCS on the old CPU, and load it on the new one. Additionally, + * we need to remember whether this VMCS was launched (vmx->launched), so when + * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot deduce + * this from other state, because it's possible that this VMCS had once been + * launched, but has since been cleared after a CPU switch). + */ +struct saved_vmcs { + struct vmcs *vmcs; + int cpu; + int launched; +}; + +/* Used to remember the last vmcs02 used for some recently used vmcs12s */ +struct vmcs02_list { + struct list_head list; + gpa_t vmcs12_addr; + struct saved_vmcs vmcs02; +}; + +/* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. */ @@ -178,6 +203,10 @@ struct nested_vmx { /* The host-usable pointer to the above */ struct page *current_vmcs12_page; struct vmcs12 *current_vmcs12; + + /* vmcs02_list cache of VMCSs recently used to run L2 guests */ + struct list_head vmcs02_pool; + int vmcs02_num; }; struct vcpu_vmx { @@ -4155,6 +4184,106 @@ static int handle_invalid_op(struct kvm_ } /* + * To run an L2 guest, we need a vmcs02 based the L1-specified vmcs12. + * We could reuse a single VMCS for all the L2 guests, but we also want the + * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this + * allows keeping them loaded on the processor, and in the future will allow + * optimizations where prepare_vmcs02 doesn't need to set all the fields on + * every entry if they never change. + * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE + * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. + * + * The following functions allocate and free a vmcs02 in this pool. + */ + +static void __nested_free_saved_vmcs(void *arg) +{ + struct saved_vmcs *saved_vmcs = arg; + + vmcs_clear(saved_vmcs->vmcs); + if (per_cpu(current_vmcs, saved_vmcs->cpu) == saved_vmcs->vmcs) + per_cpu(current_vmcs, saved_vmcs->cpu) = NULL; +} + +/* + * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded + * (the necessary information is in the saved_vmcs structure). + * See also vcpu_clear() (with different parameters and side-effects) + */ +static void nested_free_saved_vmcs(struct vcpu_vmx *vmx, + struct saved_vmcs *saved_vmcs) +{ + if (saved_vmcs->cpu != -1) + smp_call_function_single(saved_vmcs->cpu, + __nested_free_saved_vmcs, saved_vmcs, 1); + + free_vmcs(saved_vmcs->vmcs); +} + +/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ +static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) +{ + struct vmcs02_list *item; + list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) + if (item->vmcs12_addr == vmptr) { + nested_free_saved_vmcs(vmx, &item->vmcs02); + list_del(&item->list); + kfree(item); + vmx->nested.vmcs02_num--; + return; + } +} + +/* Free all vmcs02 saved for this vcpu */ +static void nested_free_all_vmcs02(struct vcpu_vmx *vmx) +{ + struct vmcs02_list *item, *n; + list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { + nested_free_saved_vmcs(vmx, &item->vmcs02); + list_del(&item->list); + kfree(item); + } + vmx->nested.vmcs02_num = 0; +} + +/* Get a vmcs02 for the current vmcs12. */ +static struct saved_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) +{ + struct vmcs02_list *item; + list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) + if (item->vmcs12_addr == vmx->nested.current_vmptr) { + list_move(&item->list, &vmx->nested.vmcs02_pool); + return &item->vmcs02; + } + + if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { + /* Recycle the least recently used VMCS. */ + item = list_entry(vmx->nested.vmcs02_pool.prev, + struct vmcs02_list, list); + item->vmcs12_addr = vmx->nested.current_vmptr; + list_move(&item->list, &vmx->nested.vmcs02_pool); + return &item->vmcs02; + } + + /* Create a new vmcs02 */ + item = (struct vmcs02_list *) + kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + if (!item) + return NULL; + item->vmcs02.vmcs = alloc_vmcs(); + if (!item->vmcs02.vmcs) { + kfree(item); + return NULL; + } + item->vmcs12_addr = vmx->nested.current_vmptr; + item->vmcs02.cpu = -1; + item->vmcs02.launched = 0; + list_add(&(item->list), &(vmx->nested.vmcs02_pool)); + vmx->nested.vmcs02_num++; + return &item->vmcs02; +} + +/* * Emulate the VMXON instruction. * Currently, we just remember that VMX is active, and do not save or even * inspect the argument to VMXON (the so-called "VMXON pointer") because we @@ -4190,6 +4319,9 @@ static int handle_vmon(struct kvm_vcpu * return 1; } + INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); + vmx->nested.vmcs02_num = 0; + vmx->nested.vmxon = true; skip_emulated_instruction(vcpu); @@ -4241,6 +4373,8 @@ static void free_nested(struct vcpu_vmx vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; } + + nested_free_all_vmcs02(vmx); } /* Emulate the VMXOFF instruction */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html