RE: [PATCH 07/31] nVMX: Introduce vmcs02: VMCS used to run L2

"Tian, Kevin" <kevin.tian@xxxxxxxxx> · Fri, 20 May 2011 16:04:39 +0800

> From: Nadav Har'El
> Sent: Tuesday, May 17, 2011 3:48 AM
> 
> We saw in a previous patch that L1 controls its L2 guest with a vcms12.
> L0 needs to create a real VMCS for running L2. We call that "vmcs02".
> A later patch will contain the code, prepare_vmcs02(), for filling the vmcs02
> fields. This patch only contains code for allocating vmcs02.
> 
> In this version, prepare_vmcs02() sets *all* of vmcs02's fields each time we
> enter from L1 to L2, so keeping just one vmcs02 for the vcpu is enough: It can
> be reused even when L1 runs multiple L2 guests. However, in future versions
> we'll probably want to add an optimization where vmcs02 fields that rarely
> change will not be set each time. For that, we may want to keep around several
> vmcs02s of L2 guests that have recently run, so that potentially we could run
> these L2s again more quickly because less vmwrites to vmcs02 will be needed.

That would be a neat enhancement and should have an obvious improvement.
Possibly we can maintain the vmcs02 pool along with L1 VMCLEAR ops, which
is similar to the hardware behavior regarding to cleared and launched state.

> 
> This patch adds to each vcpu a vmcs02 pool, vmx->nested.vmcs02_pool,
> which remembers the vmcs02s last used to run up to VMCS02_POOL_SIZE L2s.
> As explained above, in the current version we choose VMCS02_POOL_SIZE=1,
> I.e., one vmcs02 is allocated (and loaded onto the processor), and it is
> reused to enter any L2 guest. In the future, when prepare_vmcs02() is
> optimized not to set all fields every time, VMCS02_POOL_SIZE should be
> increased.
> 
> Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx>
> ---
>  arch/x86/kvm/vmx.c |  139
> +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 139 insertions(+)
> 
> --- .before/arch/x86/kvm/vmx.c	2011-05-16 22:36:47.000000000 +0300
> +++ .after/arch/x86/kvm/vmx.c	2011-05-16 22:36:47.000000000 +0300
> @@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_
>  module_param(ple_window, int, S_IRUGO);
> 
>  #define NR_AUTOLOAD_MSRS 1
> +#define VMCS02_POOL_SIZE 1
> 
>  struct vmcs {
>  	u32 revision_id;
> @@ -166,6 +167,30 @@ struct __packed vmcs12 {
>  #define VMCS12_SIZE 0x1000
> 
>  /*
> + * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's VMCS
> + * while we use L2's VMCS), and we wish to save the previous VMCS, we must
> also
> + * remember on which CPU it was last loaded (vcpu->cpu), so when we return
> to
> + * using this VMCS we'll know if we're now running on a different CPU and
> need
> + * to clear the VMCS on the old CPU, and load it on the new one. Additionally,
> + * we need to remember whether this VMCS was launched (vmx->launched),
> so when
> + * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot
> deduce
> + * this from other state, because it's possible that this VMCS had once been
> + * launched, but has since been cleared after a CPU switch).
> + */
> +struct saved_vmcs {
> +	struct vmcs *vmcs;
> +	int cpu;
> +	int launched;
> +};

"saved" looks a bit misleading here. It's simply a list of all active vmcs02 tracked
by kvm, isn't it?

> +
> +/* Used to remember the last vmcs02 used for some recently used vmcs12s
> */
> +struct vmcs02_list {
> +	struct list_head list;
> +	gpa_t vmcs12_addr;

uniform the name 'vmptr' as nested_vmx strucure:
 /* The guest-physical address of the current VMCS L1 keeps for L2 */
	gpa_t current_vmptr;
	/* The host-usable pointer to the above */
	struct page *current_vmcs12_page;
	struct vmcs12 *current_vmcs12;

you should keep consistent meaning for vmcs12, which means the arch-neutral
state interpreted by KVM only.

> +	struct saved_vmcs vmcs02;
> +};
> +
> +/*
>   * The nested_vmx structure is part of vcpu_vmx, and holds information we
> need
>   * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
>   */
> @@ -178,6 +203,10 @@ struct nested_vmx {
>  	/* The host-usable pointer to the above */
>  	struct page *current_vmcs12_page;
>  	struct vmcs12 *current_vmcs12;
> +
> +	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
> +	struct list_head vmcs02_pool;
> +	int vmcs02_num;
>  };
> 
>  struct vcpu_vmx {
> @@ -4200,6 +4229,111 @@ static int handle_invalid_op(struct kvm_
>  }
> 
>  /*
> + * To run an L2 guest, we need a vmcs02 based the L1-specified vmcs12.
> + * We could reuse a single VMCS for all the L2 guests, but we also want the
> + * option to allocate a separate vmcs02 for each separate loaded vmcs12 -
> this
> + * allows keeping them loaded on the processor, and in the future will allow
> + * optimizations where prepare_vmcs02 doesn't need to set all the fields on
> + * every entry if they never change.
> + * So we keep, in vmx->nested.vmcs02_pool, a cache of size
> VMCS02_POOL_SIZE
> + * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
> + *
> + * The following functions allocate and free a vmcs02 in this pool.
> + */
> +
> +static void __nested_free_saved_vmcs(void *arg)
> +{
> +	struct saved_vmcs *saved_vmcs = arg;
> +
> +	vmcs_clear(saved_vmcs->vmcs);
> +	if (per_cpu(current_vmcs, saved_vmcs->cpu) == saved_vmcs->vmcs)
> +		per_cpu(current_vmcs, saved_vmcs->cpu) = NULL;
> +}
> +
> +/*
> + * Free a VMCS, but before that VMCLEAR it on the CPU where it was last
> loaded
> + * (the necessary information is in the saved_vmcs structure).
> + * See also vcpu_clear() (with different parameters and side-effects)
> + */
> +static void nested_free_saved_vmcs(struct vcpu_vmx *vmx,
> +		struct saved_vmcs *saved_vmcs)
> +{
> +	if (saved_vmcs->cpu != -1)
> +		smp_call_function_single(saved_vmcs->cpu,
> +				__nested_free_saved_vmcs, saved_vmcs, 1);
> +
> +	free_vmcs(saved_vmcs->vmcs);
> +}
> +
> +/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
> +static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
> +{
> +	struct vmcs02_list *item;
> +	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
> +		if (item->vmcs12_addr == vmptr) {
> +			nested_free_saved_vmcs(vmx, &item->vmcs02);
> +			list_del(&item->list);
> +			kfree(item);
> +			vmx->nested.vmcs02_num--;
> +			return;
> +		}
> +}
> +
> +/*
> + * Free all VMCSs saved for this vcpu, except the actual vmx->vmcs.
> + * These include the VMCSs in vmcs02_pool (except the one currently used,
> + * if running L2), and saved_vmcs01 when running L2.
> + */
> +static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
> +{
> +	struct vmcs02_list *item, *n;
> +	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
> +		if (vmx->vmcs != item->vmcs02.vmcs)
> +			nested_free_saved_vmcs(vmx, &item->vmcs02);
> +		list_del(&item->list);
> +		kfree(item);
> +	}
> +	vmx->nested.vmcs02_num = 0;
> +}
> +
> +/* Get a vmcs02 for the current vmcs12. */
> +static struct saved_vmcs *nested_get_current_vmcs02(struct vcpu_vmx
> *vmx)
> +{
> +	struct vmcs02_list *item;
> +	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
> +		if (item->vmcs12_addr == vmx->nested.current_vmptr) {
> +			list_move(&item->list, &vmx->nested.vmcs02_pool);
> +			return &item->vmcs02;
> +		}
> +
> +	if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
> +		/* Recycle the least recently used VMCS. */
> +		item = list_entry(vmx->nested.vmcs02_pool.prev,
> +			struct vmcs02_list, list);
> +		item->vmcs12_addr = vmx->nested.current_vmptr;
> +		list_move(&item->list, &vmx->nested.vmcs02_pool);
> +		return &item->vmcs02;
> +	}
> +
> +	/* Create a new vmcs02 */
> +	item = (struct vmcs02_list *)
> +		kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
> +	if (!item)
> +		return NULL;
> +	item->vmcs02.vmcs = alloc_vmcs();
> +	if (!item->vmcs02.vmcs) {
> +		kfree(item);
> +		return NULL;
> +	}
> +	item->vmcs12_addr = vmx->nested.current_vmptr;
> +	item->vmcs02.cpu = -1;
> +	item->vmcs02.launched = 0;
> +	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
> +	vmx->nested.vmcs02_num++;
> +	return &item->vmcs02;
> +}
> +
> +/*
>   * Emulate the VMXON instruction.
>   * Currently, we just remember that VMX is active, and do not save or even
>   * inspect the argument to VMXON (the so-called "VMXON pointer") because
> we
> @@ -4235,6 +4369,9 @@ static int handle_vmon(struct kvm_vcpu *
>  		return 1;
>  	}
> 
> +	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
> +	vmx->nested.vmcs02_num = 0;
> +
>  	vmx->nested.vmxon = true;
> 
>  	skip_emulated_instruction(vcpu);
> @@ -4286,6 +4423,8 @@ static void free_nested(struct vcpu_vmx
>  		vmx->nested.current_vmptr = -1ull;
>  		vmx->nested.current_vmcs12 = NULL;
>  	}
> +
> +	nested_free_all_saved_vmcss(vmx);
>  }
> 
>  /* Emulate the VMXOFF instruction */
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html