[PATCH 7/8] kvm: nVMX: Introduce KVM_CAP_VMX_STATE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



With this capability, there are two new vcpu ioctls:
KVM_GET_VMX_STATE and KVM_SET_VMX_STATE. These can be used
for saving and restoring a VM that is in VMX operation.

Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx>
---
 Documentation/virtual/kvm/api.txt |  44 ++++++++++++
 arch/x86/include/asm/kvm_host.h   |   5 ++
 arch/x86/include/uapi/asm/kvm.h   |  12 ++++
 arch/x86/kvm/vmx.c                | 138 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c                |  19 ++++++
 include/uapi/linux/kvm.h          |   4 ++
 6 files changed, 222 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6bbceb9..8694eb9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3198,6 +3198,50 @@ struct kvm_reinject_control {
 pit_reinject = 0 (!reinject mode) is recommended, unless running an old
 operating system that uses the PIT for timing (e.g. Linux 2.4.x).
 
+4.99 KVM_GET_VMX_STATE
+
+Capability: KVM_CAP_VMX_STATE
+Architectures: x86/vmx
+Type: vcpu ioctl
+Parameters: struct kvm_vmx_state (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  E2BIG:     the data size exceeds the value of data_size specified by
+             the user (the size required will be written into data_size).
+
+The maximum data size is currently 8192.
+
+struct kvm_vmx_state {
+	__u64 vmxon_ptr;
+	__u64 current_vmcs;
+	__u32 flags;
+	__u32 data_size;
+	__u8 data[0];
+};
+
+This ioctl copies the vcpu's kvm_vmx_state struct from the kernel to
+userspace.
+
+
+4.100 KVM_SET_VMX_STATE
+
+Capability: KVM_CAP_VMX_STATE
+Architectures: x86/vmx
+Type: vcpu ioctl
+Parameters: struct kvm_vmx_state (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_vmx_state {
+	__u64 vmxon_ptr;
+	__u64 current_vmcs;
+	__u32 flags;
+	__u32 data_size;
+	__u8 data[0];
+};
+
+This copies the vcpu's kvm_vmx_state struct from userspace to the
+kernel.
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdde807..d6be6f1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1020,6 +1020,11 @@ struct kvm_x86_ops {
 	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
+
+	int (*get_vmx_state)(struct kvm_vcpu *vcpu,
+			     struct kvm_vmx_state __user *user_vmx_state);
+	int (*set_vmx_state)(struct kvm_vcpu *vcpu,
+			     struct kvm_vmx_state __user *user_vmx_state);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c5..5aaf8bb 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -357,4 +357,16 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
+#define KVM_VMX_STATE_GUEST_MODE	0x00000001
+#define KVM_VMX_STATE_RUN_PENDING	0x00000002
+
+/* for KVM_CAP_VMX_STATE */
+struct kvm_vmx_state {
+	__u64 vmxon_ptr;
+	__u64 current_vmptr;
+	__u32 flags;
+	__u32 data_size;
+	__u8 data[0];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9f0c747..d75c183 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11275,6 +11275,141 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 			~FEATURE_CONTROL_LMCE;
 }
 
+static int get_vmcs_cache(struct kvm_vcpu *vcpu,
+			  struct kvm_vmx_state __user *user_vmx_state,
+			  struct kvm_vmx_state vmx_state)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	/*
+	 * When running L2, the authoritative vmcs12 state is in the
+	 * vmcs02. When running L1, the authoritative vmcs12 state is
+	 * in the shadow vmcs linked to vmcs01, unless
+	 * sync_shadow_vmcs is set, in which case, the authoritative
+	 * vmcs12 state is in the vmcs12 already.
+	 */
+	if (is_guest_mode(vcpu))
+		sync_vmcs12(vcpu, vmcs12);
+	else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
+		copy_shadow_to_vmcs12(vmx);
+	if (copy_to_user(user_vmx_state->data, vmcs12, VMCS12_SIZE))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int get_vmx_state(struct kvm_vcpu *vcpu,
+			 struct kvm_vmx_state __user *user_vmx_state)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_vmx_state vmx_state = {
+		.vmxon_ptr = -1ull,
+		.current_vmptr = -1ull,
+		.flags = 0,
+		.data_size = 0
+	};
+	u32 user_data_size;
+
+	if (copy_from_user(&user_data_size, &user_vmx_state->data_size,
+			   sizeof(user_data_size)))
+		return -EFAULT;
+
+	if (nested_vmx_allowed(vcpu) && vmx->nested.vmxon) {
+		vmx_state.vmxon_ptr = vmx->nested.vmxon_ptr;
+		vmx_state.current_vmptr = vmx->nested.current_vmptr;
+		if (vmx_state.current_vmptr != -1ull)
+			vmx_state.data_size += VMCS12_SIZE;
+		if (is_guest_mode(vcpu)) {
+			vmx_state.flags |= KVM_VMX_STATE_GUEST_MODE;
+			if (vmx->nested.nested_run_pending)
+				vmx_state.flags |= KVM_VMX_STATE_RUN_PENDING;
+		}
+	}
+
+	if (copy_to_user(user_vmx_state, &vmx_state, sizeof(vmx_state)))
+		return -EFAULT;
+
+	if (user_data_size < vmx_state.data_size)
+		return -E2BIG;
+
+	if (vmx_state.data_size > 0)
+		return get_vmcs_cache(vcpu, user_vmx_state, vmx_state);
+
+	return 0;
+}
+
+static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
+static int set_vmcs_cache(struct kvm_vcpu *vcpu,
+			  struct kvm_vmx_state __user *user_vmx_state,
+			  struct kvm_vmx_state vmx_state)
+
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	u32 exit_qual;
+
+	if (vmx_state.data_size < VMCS12_SIZE ||
+	    vmx_state.current_vmptr == vmx_state.vmxon_ptr ||
+	    !page_address_valid(vcpu, vmx_state.current_vmptr))
+		return -EINVAL;
+	if (copy_from_user(vmcs12, user_vmx_state->data, VMCS12_SIZE))
+		return -EFAULT;
+	if (vmcs12->revision_id != VMCS12_REVISION)
+		return -EINVAL;
+	set_current_vmptr(vmx, vmx_state.current_vmptr);
+	if (enable_shadow_vmcs)
+		vmx->nested.sync_shadow_vmcs = true;
+	if (!(vmx_state.flags & KVM_VMX_STATE_GUEST_MODE))
+		return 0;
+
+	if (check_vmentry_prereqs(vcpu, vmcs12) ||
+	    check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+		return -EINVAL;
+	return enter_vmx_non_root_mode(vcpu);
+}
+
+static int set_vmx_state(struct kvm_vcpu *vcpu,
+			 struct kvm_vmx_state __user *user_vmx_state)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_vmx_state vmx_state;
+	int ret;
+
+	if (copy_from_user(&vmx_state, user_vmx_state, sizeof(vmx_state)))
+		return -EFAULT;
+
+	if (vmx_state.flags &
+	    ~(KVM_VMX_STATE_RUN_PENDING | KVM_VMX_STATE_GUEST_MODE))
+		return -EINVAL;
+
+	if (!nested_vmx_allowed(vcpu))
+		return vmx_state.vmxon_ptr == -1ull ? 0 : -EINVAL;
+
+	vmx_leave_nested(vcpu);
+
+	vmx->nested.nested_run_pending =
+		!!(vmx_state.flags & KVM_VMX_STATE_RUN_PENDING);
+	if (vmx_state.vmxon_ptr == -1ull)
+		return 0;
+
+	if (!page_address_valid(vcpu, vmx_state.vmxon_ptr))
+		return -EINVAL;
+	vmx->nested.vmxon_ptr = vmx_state.vmxon_ptr;
+	ret = enter_vmx_operation(vcpu);
+	if (ret)
+		return ret;
+
+	if (vmx_state.current_vmptr == -1ull)
+		return 0;
+
+	return set_vmcs_cache(vcpu, user_vmx_state, vmx_state);
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -11403,6 +11538,9 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 #endif
 
 	.setup_mce = vmx_setup_mce,
+
+	.get_vmx_state = get_vmx_state,
+	.set_vmx_state = set_vmx_state,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04c5d96..e249215 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2685,6 +2685,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_X2APIC_API:
 		r = KVM_X2APIC_API_VALID_FLAGS;
 		break;
+	case KVM_CAP_VMX_STATE:
+		r = !!kvm_x86_ops->get_vmx_state;
+		break;
 	default:
 		r = 0;
 		break;
@@ -3585,6 +3588,22 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+	case KVM_GET_VMX_STATE: {
+		struct kvm_vmx_state __user *user_vmx_state = argp;
+
+		r = -EINVAL;
+		if (kvm_x86_ops->get_vmx_state)
+			r = kvm_x86_ops->get_vmx_state(vcpu, user_vmx_state);
+		goto out;
+	}
+	case KVM_SET_VMX_STATE: {
+		struct kvm_vmx_state __user *user_vmx_state = argp;
+
+		r = -EINVAL;
+		if (kvm_x86_ops->set_vmx_state)
+			r = kvm_x86_ops->set_vmx_state(vcpu, user_vmx_state);
+		goto out;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4ee67cb..ba3c586 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -870,6 +870,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_INSTR0 130
 #define KVM_CAP_MSI_DEVID 131
 #define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_VMX_STATE 133
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1280,6 +1281,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 /* Available with KVM_CAP_X86_SMM */
 #define KVM_SMI                   _IO(KVMIO,   0xb7)
+/* Available with KVM_CAP_VMX_STATE */
+#define KVM_GET_VMX_STATE         _IOWR(KVMIO, 0xb8, struct kvm_vmx_state)
+#define KVM_SET_VMX_STATE         _IOW(KVMIO,  0xb9, struct kvm_vmx_state)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux