Re: [PATCH v2 2/2] KVM: selftests: Add logic to detect if ioctl() failed because VM was killed

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/8/2023 9:09 AM, Sean Christopherson wrote:
Add yet another macro to the VM/vCPU ioctl() framework to detect when an
ioctl() failed because KVM killed/bugged the VM, i.e. when there was
nothing wrong with the ioctl() itself.  If KVM kills a VM, e.g. by way of
a failed KVM_BUG_ON(), all subsequent VM and vCPU ioctl()s will fail with
-EIO, which can be quite misleading and ultimately waste user/developer
time.

Use KVM_CHECK_EXTENSION on KVM_CAP_USER_MEMORY to detect if the VM is
dead and/or bug, as KVM doesn't provide a dedicated ioctl().  Using a
heuristic is obviously less than ideal, but practically speaking the logic
is bulletproof barring a KVM change, and any such change would arguably
break userspace, e.g. if KVM returns something other than -EIO.

We hit similar issue when testing TDX VMs. Most failure of SEMCALL is handled with a KVM_BUG_ON(), which leads to vm dead. Then the following IOCTL from userspace (QEMU) and gets -EIO.

Can we return a new KVM_EXIT_VM_DEAD on KVM_REQ_VM_DEAD? and replace -EIO with 0? yes, it's a ABI change. But I'm wondering if any userspace relies on -EIO behavior for VM DEAD case.

Without the detection, tearing down a bugged VM yields a cryptic failure
when deleting memslots:

   ==== Test Assertion Failure ====
   lib/kvm_util.c:689: !ret
   pid=45131 tid=45131 errno=5 - Input/output error
      1	0x00000000004036c3: __vm_mem_region_delete at kvm_util.c:689
      2	0x00000000004042f0: kvm_vm_free at kvm_util.c:724 (discriminator 12)
      3	0x0000000000402929: race_sync_regs at sync_regs_test.c:193
      4	0x0000000000401cab: main at sync_regs_test.c:334 (discriminator 6)
      5	0x0000000000416f13: __libc_start_call_main at libc-start.o:?
      6	0x000000000041855f: __libc_start_main_impl at ??:?
      7	0x0000000000401d40: _start at ??:?
   KVM_SET_USER_MEMORY_REGION failed, rc: -1 errno: 5 (Input/output error)

Which morphs into a more pointed error message with the detection:

   ==== Test Assertion Failure ====
   lib/kvm_util.c:689: false
   pid=80347 tid=80347 errno=5 - Input/output error
      1	0x00000000004039ab: __vm_mem_region_delete at kvm_util.c:689 (discriminator 5)
      2	0x0000000000404660: kvm_vm_free at kvm_util.c:724 (discriminator 12)
      3	0x0000000000402ac9: race_sync_regs at sync_regs_test.c:193
      4	0x0000000000401cb7: main at sync_regs_test.c:334 (discriminator 6)
      5	0x0000000000418263: __libc_start_call_main at libc-start.o:?
      6	0x00000000004198af: __libc_start_main_impl at ??:?
      7	0x0000000000401d90: _start at ??:?
   KVM killed/bugged the VM, check the kernel log for clues

Suggested-by: Michal Luczaj <mhal@xxxxxxx>
Cc: Oliver Upton <oliver.upton@xxxxxxxxx>
Cc: Colton Lewis <coltonlewis@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
  .../selftests/kvm/include/kvm_util_base.h     | 39 ++++++++++++++++---
  tools/testing/selftests/kvm/lib/kvm_util.c    |  2 +-
  2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 1f6193dc7d3a..c7717942ddbb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -282,11 +282,40 @@ static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
  	kvm_do_ioctl((vm)->fd, cmd, arg);			\
  })
+/*
+ * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
+ * the ioctl() failed because KVM killed/bugged the VM.  To detect a dead VM,
+ * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
+ * selftests existed and (b) should never outright fail, i.e. is supposed to
+ * return 0 or 1.  If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
+ * VM and its vCPUs, including KVM_CHECK_EXTENSION.
+ */
+#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm)				\
+do {											\
+	int __errno = errno;								\
+											\
+	static_assert_is_vm(vm);							\
+											\
+	if (cond)									\
+		break;									\
+											\
+	if (errno == EIO &&								\
+	    __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) {	\
+		TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO");	\
+		TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues");	\
+	}										\
+	errno = __errno;								\
+	TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret));				\
+} while (0)
+
+#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm)		\
+	__TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
+
  #define vm_ioctl(vm, cmd, arg)					\
  ({								\
  	int ret = __vm_ioctl(vm, cmd, arg);			\
  								\
-	TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret));	\
+	__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);		\
  })
static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
@@ -301,7 +330,7 @@ static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
  ({								\
  	int ret = __vcpu_ioctl(vcpu, cmd, arg);			\
  								\
-	TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret));	\
+	__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm);	\
  })
/*
@@ -312,7 +341,7 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap)
  {
  	int ret =  __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
- TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
+	TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
  	return ret;
  }
@@ -371,7 +400,7 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
  {
  	int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
- TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd));
+	TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
  	return fd;
  }
@@ -583,7 +612,7 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
  {
  	int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
- TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd));
+	TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
  	return fd;
  }
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 7a8af1821f5d..c847f942cd38 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1227,7 +1227,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
  	vcpu->vm = vm;
  	vcpu->id = vcpu_id;
  	vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
-	TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd));
+	TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
  		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux