Since L2 guest is running on L0 hypervisor directly, there is some state that L0 holds that is needed to safely save/resume L2. The rest of the state is constructed from the VMCS12 which is stored in the L1 hypervisor memory. NOTE: I am still debugging a VMEntry failure for L2 that occassionally happens after a loop of save and restore. Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/include/uapi/asm/kvm.h | 7 +++++++ arch/x86/kvm/vmx.c | 44 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 10 ++++++++++ include/linux/kvm_host.h | 2 ++ include/uapi/linux/kvm.h | 3 +++ virt/kvm/kvm_main.c | 24 ++++++++++++++++++++++ 7 files changed, 93 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2c8be56..4223ce9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -966,6 +966,9 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + int (*get_nested)(struct kvm_vcpu *vcpu, struct kvm_nested *nested); + int (*set_nested)(struct kvm_vcpu *vcpu, struct kvm_nested *nested); + void (*update_bp_intercept)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index b49816d..f1e36ef 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -157,6 +157,13 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +struct kvm_nested { + __u8 vmxon; + __u64 vmxon_ptr; + __u64 current_vmptr; + __u64 vmcs01_debugctl; +}; + /* for KVM_GET_FPU and KVM_SET_FPU */ struct kvm_fpu { __u8 fpr[8][16]; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 22eb0dc..813ca37 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12419,6 +12419,47 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu) return 1; } +static int vmx_set_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested) +{ + struct nested_vmx *nvmx = &to_vmx(vcpu)->nested; + + if (!nested->vmxon) + return 0; + + nvmx->vmxon_ptr = nested->vmxon_ptr; + enter_vmx_operation(vcpu); + nvmx->current_vmptr = nested->current_vmptr; + nvmx->vmcs01_debugctl = nested->vmcs01_debugctl; + + if (nested->current_vmptr != -1ull) { + if (kvm_read_guest(vcpu->kvm, nvmx->current_vmptr, + nvmx->cached_vmcs12, + sizeof(*nvmx->cached_vmcs12))) + return -EFAULT; + + set_current_vmptr(to_vmx(vcpu), nested->current_vmptr); + } + + return 0; +} + +static int vmx_get_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested) +{ + struct nested_vmx *nvmx = &to_vmx(vcpu)->nested; + + memset(nested, 0, sizeof(*nested)); + + if (!nvmx->vmxon) + return 0; + + nested->vmxon = true; + nested->vmxon_ptr = nvmx->vmxon_ptr; + nested->current_vmptr = nvmx->current_vmptr; + nested->vmcs01_debugctl = nvmx->vmcs01_debugctl; + + return 0; +} + static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -12512,6 +12553,9 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .tlb_flush = vmx_flush_tlb, + .set_nested = vmx_set_nested, + .get_nested = vmx_get_nested, + .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, .skip_emulated_instruction = skip_emulated_instruction, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8256a2d..de34a7a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7543,6 +7543,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return r; } +int kvm_arch_vcpu_ioctl_get_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested) +{ + return kvm_x86_ops->get_nested(vcpu, nested); +} + +int kvm_arch_vcpu_ioctl_set_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested) +{ + return kvm_x86_ops->set_nested(vcpu, nested); +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { vcpu_load(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index beb9d42..becc066 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -776,6 +776,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr); +int kvm_arch_vcpu_ioctl_get_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested); +int kvm_arch_vcpu_ioctl_set_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested); int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 59e5b7b..eace4c8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1467,6 +1467,9 @@ struct kvm_sev_dbg { __u32 len; }; +#define KVM_GET_NESTED _IOR(KVMIO, 0xba, struct kvm_nested) +#define KVM_SET_NESTED _IOW(KVMIO, 0xbb, struct kvm_nested) + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38f35ae..539ccfb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2661,6 +2661,30 @@ static long kvm_vcpu_ioctl(struct file *filp, kfree(kvm_regs); break; } + case KVM_GET_NESTED: { + struct kvm_nested nested; + + r = kvm_arch_vcpu_ioctl_get_nested(vcpu, &nested); + if (r) + break; + + r = -EFAULT; + if (copy_to_user(argp, &nested, sizeof(nested))) + break; + + r = 0; + break; + } + case KVM_SET_NESTED: { + struct kvm_nested nested; + + r = -EFAULT; + if (copy_from_user(&nested, argp, sizeof(nested))) + break; + + r = kvm_arch_vcpu_ioctl_set_nested(vcpu, &nested); + break; + } case KVM_GET_SREGS: { kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); r = -ENOMEM; -- 2.7.4