In this patch we add a list of L0 (hardware) VMCSs, which we'll use to hold a hardware VMCS for each active L1 VMCS (i.e., for each L2 guest). We call each of these L0 VMCSs a "vmcs02", as it is the VMCS that L0 uses to run its nested guest L2. Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> --- --- .before/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300 @@ -140,6 +140,12 @@ struct __attribute__ ((__packed__)) vmcs u32 abort; }; +struct vmcs_list { + struct list_head list; + gpa_t vmcs_addr; + struct vmcs *l2_vmcs; +}; + /* The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example, * the current VMCS set by L1, a list of the VMCSs used to run the active @@ -153,6 +159,10 @@ struct nested_vmx { gpa_t current_vmptr; /* The host-usable pointer to the above. Set by nested_map_current() */ struct vmcs12 *current_l2_page; + + /* list of real (hardware) VMCS, one for each L2 guest of L1 */ + struct list_head l2_vmcs_list; /* a vmcs_list */ + int l2_vmcs_num; }; struct vcpu_vmx { @@ -1754,6 +1764,84 @@ static void free_vmcs(struct vmcs *vmcs) free_pages((unsigned long)vmcs, vmcs_config.order); } +static struct vmcs *nested_get_current_vmcs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_list *list_item, *n; + + list_for_each_entry_safe(list_item, n, &vmx->nested.l2_vmcs_list, list) + if (list_item->vmcs_addr == vmx->nested.current_vmptr) + return list_item->l2_vmcs; + + return NULL; +} + +/* Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one + * does not already exist. The allocation is done in L0 memory, so to avoid + * denial-of-service attack by guests, we limit the number of concurrently- + * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not + * trigger this limit. + */ +static const int NESTED_MAX_VMCS = 256; +static int nested_create_current_vmcs(struct kvm_vcpu *vcpu) +{ + struct vmcs_list *new_l2_guest; + struct vmcs *l2_vmcs; + + if (nested_get_current_vmcs(vcpu)) + return 0; /* nothing to do - we already have a VMCS */ + + if (to_vmx(vcpu)->nested.l2_vmcs_num >= NESTED_MAX_VMCS) + return -ENOMEM; + + new_l2_guest = (struct vmcs_list *) + kmalloc(sizeof(struct vmcs_list), GFP_KERNEL); + if (!new_l2_guest) + return -ENOMEM; + + l2_vmcs = alloc_vmcs(); + if (!l2_vmcs) { + kfree(new_l2_guest); + return -ENOMEM; + } + + new_l2_guest->vmcs_addr = to_vmx(vcpu)->nested.current_vmptr; + new_l2_guest->l2_vmcs = l2_vmcs; + list_add(&(new_l2_guest->list), &(to_vmx(vcpu)->nested.l2_vmcs_list)); + to_vmx(vcpu)->nested.l2_vmcs_num++; + return 0; +} + +/* Free the current L2 VMCS, and remove it from l2_vmcs_list */ +static void nested_free_current_vmcs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_list *list_item, *n; + + list_for_each_entry_safe(list_item, n, &vmx->nested.l2_vmcs_list, list) + if (list_item->vmcs_addr == vmx->nested.current_vmptr) { + free_vmcs(list_item->l2_vmcs); + list_del(&(list_item->list)); + kfree(list_item); + vmx->nested.l2_vmcs_num--; + return; + } +} + +static void free_l1_state(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_list *list_item, *n; + + list_for_each_entry_safe(list_item, n, + &vmx->nested.l2_vmcs_list, list) { + free_vmcs(list_item->l2_vmcs); + list_del(&(list_item->list)); + kfree(list_item); + } + vmx->nested.l2_vmcs_num = 0; +} + static void free_kvm_area(void) { int cpu; @@ -3606,6 +3694,9 @@ static int handle_vmon(struct kvm_vcpu * return 1; } + INIT_LIST_HEAD(&(vmx->nested.l2_vmcs_list)); + vmx->nested.l2_vmcs_num = 0; + vmx->nested.vmxon = 1; skip_emulated_instruction(vcpu); @@ -3650,6 +3741,8 @@ static int handle_vmoff(struct kvm_vcpu to_vmx(vcpu)->nested.vmxon = 0; + free_l1_state(vcpu); + skip_emulated_instruction(vcpu); return 1; } @@ -4402,6 +4495,8 @@ static void vmx_free_vcpu(struct kvm_vcp struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); + if (vmx->nested.vmxon) + free_l1_state(vcpu); vmx_free_vmcs(vcpu); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html