With this capability, there are two new vcpu ioctls: KVM_GET_VMX_STATE and KVM_SET_VMX_STATE. These can be used for saving and restoring a VM that is in VMX operation. Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> --- Documentation/virtual/kvm/api.txt | 44 ++++++++++++ arch/x86/include/asm/kvm_host.h | 5 ++ arch/x86/include/uapi/asm/kvm.h | 12 ++++ arch/x86/kvm/vmx.c | 138 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 19 ++++++ include/uapi/linux/kvm.h | 4 ++ 6 files changed, 222 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6bbceb9..8694eb9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3198,6 +3198,50 @@ struct kvm_reinject_control { pit_reinject = 0 (!reinject mode) is recommended, unless running an old operating system that uses the PIT for timing (e.g. Linux 2.4.x). +4.99 KVM_GET_VMX_STATE + +Capability: KVM_CAP_VMX_STATE +Architectures: x86/vmx +Type: vcpu ioctl +Parameters: struct kvm_vmx_state (in/out) +Returns: 0 on success, -1 on error +Errors: + E2BIG: the data size exceeds the value of data_size specified by + the user (the size required will be written into data_size). + +The maximum data size is currently 8192. + +struct kvm_vmx_state { + __u64 vmxon_ptr; + __u64 current_vmcs; + __u32 flags; + __u32 data_size; + __u8 data[0]; +}; + +This ioctl copies the vcpu's kvm_vmx_state struct from the kernel to +userspace. + + +4.100 KVM_SET_VMX_STATE + +Capability: KVM_CAP_VMX_STATE +Architectures: x86/vmx +Type: vcpu ioctl +Parameters: struct kvm_vmx_state (in) +Returns: 0 on success, -1 on error + +struct kvm_vmx_state { + __u64 vmxon_ptr; + __u64 current_vmcs; + __u32 flags; + __u32 data_size; + __u8 data[0]; +}; + +This copies the vcpu's kvm_vmx_state struct from userspace to the +kernel. + 5. The kvm_run structure ------------------------ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bdde807..d6be6f1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1020,6 +1020,11 @@ struct kvm_x86_ops { void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu); + + int (*get_vmx_state)(struct kvm_vcpu *vcpu, + struct kvm_vmx_state __user *user_vmx_state); + int (*set_vmx_state)(struct kvm_vcpu *vcpu, + struct kvm_vmx_state __user *user_vmx_state); }; struct kvm_arch_async_pf { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 739c0c5..5aaf8bb 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -357,4 +357,16 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_VMX_STATE_GUEST_MODE 0x00000001 +#define KVM_VMX_STATE_RUN_PENDING 0x00000002 + +/* for KVM_CAP_VMX_STATE */ +struct kvm_vmx_state { + __u64 vmxon_ptr; + __u64 current_vmptr; + __u32 flags; + __u32 data_size; + __u8 data[0]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9f0c747..d75c183 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11275,6 +11275,141 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEATURE_CONTROL_LMCE; } +static int get_vmcs_cache(struct kvm_vcpu *vcpu, + struct kvm_vmx_state __user *user_vmx_state, + struct kvm_vmx_state vmx_state) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + /* + * When running L2, the authoritative vmcs12 state is in the + * vmcs02. When running L1, the authoritative vmcs12 state is + * in the shadow vmcs linked to vmcs01, unless + * sync_shadow_vmcs is set, in which case, the authoritative + * vmcs12 state is in the vmcs12 already. + */ + if (is_guest_mode(vcpu)) + sync_vmcs12(vcpu, vmcs12); + else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + if (copy_to_user(user_vmx_state->data, vmcs12, VMCS12_SIZE)) + return -EFAULT; + + return 0; +} + +static int get_vmx_state(struct kvm_vcpu *vcpu, + struct kvm_vmx_state __user *user_vmx_state) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_vmx_state vmx_state = { + .vmxon_ptr = -1ull, + .current_vmptr = -1ull, + .flags = 0, + .data_size = 0 + }; + u32 user_data_size; + + if (copy_from_user(&user_data_size, &user_vmx_state->data_size, + sizeof(user_data_size))) + return -EFAULT; + + if (nested_vmx_allowed(vcpu) && vmx->nested.vmxon) { + vmx_state.vmxon_ptr = vmx->nested.vmxon_ptr; + vmx_state.current_vmptr = vmx->nested.current_vmptr; + if (vmx_state.current_vmptr != -1ull) + vmx_state.data_size += VMCS12_SIZE; + if (is_guest_mode(vcpu)) { + vmx_state.flags |= KVM_VMX_STATE_GUEST_MODE; + if (vmx->nested.nested_run_pending) + vmx_state.flags |= KVM_VMX_STATE_RUN_PENDING; + } + } + + if (copy_to_user(user_vmx_state, &vmx_state, sizeof(vmx_state))) + return -EFAULT; + + if (user_data_size < vmx_state.data_size) + return -E2BIG; + + if (vmx_state.data_size > 0) + return get_vmcs_cache(vcpu, user_vmx_state, vmx_state); + + return 0; +} + +static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); +} + +static int set_vmcs_cache(struct kvm_vcpu *vcpu, + struct kvm_vmx_state __user *user_vmx_state, + struct kvm_vmx_state vmx_state) + +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 exit_qual; + + if (vmx_state.data_size < VMCS12_SIZE || + vmx_state.current_vmptr == vmx_state.vmxon_ptr || + !page_address_valid(vcpu, vmx_state.current_vmptr)) + return -EINVAL; + if (copy_from_user(vmcs12, user_vmx_state->data, VMCS12_SIZE)) + return -EFAULT; + if (vmcs12->revision_id != VMCS12_REVISION) + return -EINVAL; + set_current_vmptr(vmx, vmx_state.current_vmptr); + if (enable_shadow_vmcs) + vmx->nested.sync_shadow_vmcs = true; + if (!(vmx_state.flags & KVM_VMX_STATE_GUEST_MODE)) + return 0; + + if (check_vmentry_prereqs(vcpu, vmcs12) || + check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) + return -EINVAL; + return enter_vmx_non_root_mode(vcpu); +} + +static int set_vmx_state(struct kvm_vcpu *vcpu, + struct kvm_vmx_state __user *user_vmx_state) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_vmx_state vmx_state; + int ret; + + if (copy_from_user(&vmx_state, user_vmx_state, sizeof(vmx_state))) + return -EFAULT; + + if (vmx_state.flags & + ~(KVM_VMX_STATE_RUN_PENDING | KVM_VMX_STATE_GUEST_MODE)) + return -EINVAL; + + if (!nested_vmx_allowed(vcpu)) + return vmx_state.vmxon_ptr == -1ull ? 0 : -EINVAL; + + vmx_leave_nested(vcpu); + + vmx->nested.nested_run_pending = + !!(vmx_state.flags & KVM_VMX_STATE_RUN_PENDING); + if (vmx_state.vmxon_ptr == -1ull) + return 0; + + if (!page_address_valid(vcpu, vmx_state.vmxon_ptr)) + return -EINVAL; + vmx->nested.vmxon_ptr = vmx_state.vmxon_ptr; + ret = enter_vmx_operation(vcpu); + if (ret) + return ret; + + if (vmx_state.current_vmptr == -1ull) + return 0; + + return set_vmcs_cache(vcpu, user_vmx_state, vmx_state); +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -11403,6 +11538,9 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { #endif .setup_mce = vmx_setup_mce, + + .get_vmx_state = get_vmx_state, + .set_vmx_state = set_vmx_state, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 04c5d96..e249215 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2685,6 +2685,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X2APIC_API: r = KVM_X2APIC_API_VALID_FLAGS; break; + case KVM_CAP_VMX_STATE: + r = !!kvm_x86_ops->get_vmx_state; + break; default: r = 0; break; @@ -3585,6 +3588,22 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + case KVM_GET_VMX_STATE: { + struct kvm_vmx_state __user *user_vmx_state = argp; + + r = -EINVAL; + if (kvm_x86_ops->get_vmx_state) + r = kvm_x86_ops->get_vmx_state(vcpu, user_vmx_state); + goto out; + } + case KVM_SET_VMX_STATE: { + struct kvm_vmx_state __user *user_vmx_state = argp; + + r = -EINVAL; + if (kvm_x86_ops->set_vmx_state) + r = kvm_x86_ops->set_vmx_state(vcpu, user_vmx_state); + goto out; + } default: r = -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4ee67cb..ba3c586 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -870,6 +870,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_INSTR0 130 #define KVM_CAP_MSI_DEVID 131 #define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_VMX_STATE 133 #ifdef KVM_CAP_IRQ_ROUTING @@ -1280,6 +1281,9 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) /* Available with KVM_CAP_X86_SMM */ #define KVM_SMI _IO(KVMIO, 0xb7) +/* Available with KVM_CAP_VMX_STATE */ +#define KVM_GET_VMX_STATE _IOWR(KVMIO, 0xb8, struct kvm_vmx_state) +#define KVM_SET_VMX_STATE _IOW(KVMIO, 0xb9, struct kvm_vmx_state) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- 2.8.0.rc3.226.g39d4020 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html