[RFC PATCH] KVM: race-free exit from KVM_RUN without POSIX signals

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The purpose of the KVM_SET_SIGNAL_MASK API is to let userspace "kick"
a VCPU out of KVM_RUN through a POSIX signal.  A signal is attached
to a dummy signal handler; by blocking the signal outside KVM_RUN and
unblocking it inside, this possible race is closed:

          VCPU thread                     service thread
   --------------------------------------------------------------
        check flag
                                          set flag
                                          raise signal
        (signal handler does nothing)
        KVM_RUN

However, one issue with KVM_SET_SIGNAL_MASK is that it has to take
tsk->sighand->siglock on every KVM_RUN.  This lock is often on a
remote NUMA node, because it is on the node of a thread's creator.
Taking this lock can be very expensive if there are many userspace
exits (as is the case for SMP Windows VMs without Hyper-V reference
time counter).

As an alternative, we can put the flag directly in kvm_run so that
KVM can see it:

          VCPU thread                     service thread
   --------------------------------------------------------------
                                          raise signal
        signal handler
          set run->immediate_exit
        KVM_RUN
          check run->immediate_exit

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
 Documentation/virtual/kvm/api.txt | 13 ++++++++++++-
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/kvm_main.c               | 14 +++++++++++---
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e4f2cdcf78eb..925b1b6be073 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3389,7 +3389,18 @@ struct kvm_run {
 Request that KVM_RUN return when it becomes possible to inject external
 interrupts into the guest.  Useful in conjunction with KVM_INTERRUPT.
 
-	__u8 padding1[7];
+	__u8 immediate_exit;
+
+This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
+clears the field and exits immediately, returning -EINTR.  In the common
+scenario where a signal is used to "kick" a VCPU out of KVM_RUN, this
+field can be used as an alternative to KVM_SET_SIGNAL_MASK: instead of
+blocking the signal outside KVM_RUN, the signal handler should set
+run->immediate_exit to a nonzero value.
+
+This field is only available if KVM_CAP_IMMEDIATE_EXIT is.
+
+	__u8 padding1[6];
 
 	/* out */
 	__u32 exit_reason;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7964b970b9ad..0aa43b1e5b1d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -881,6 +881,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SPAPR_RESIZE_HPT 133
 #define KVM_CAP_PPC_MMU_RADIX 134
 #define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b4e496..5abb8df00db1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2550,7 +2550,8 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	if (r)
 		return r;
 	switch (ioctl) {
-	case KVM_RUN:
+	case KVM_RUN: {
+		struct kvm_run *run;
 		r = -EINVAL;
 		if (arg)
 			goto out;
@@ -2564,9 +2565,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
 				synchronize_rcu();
 			put_pid(oldpid);
 		}
-		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
-		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
+		run = vcpu->run;
+		if (run->immediate_exit) {
+			WRITE_ONCE(run->immediate_exit, 0);
+			return -EINTR;
+		}
+		r = kvm_arch_vcpu_ioctl_run(vcpu, run);
+		trace_kvm_userspace_exit(run->exit_reason, r);
 		break;
+	}
 	case KVM_GET_REGS: {
 		struct kvm_regs *kvm_regs;
 
@@ -2927,6 +2934,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
 	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
 	case KVM_CAP_CHECK_EXTENSION_VM:
+	case KVM_CAP_IMMEDIATE_EXIT:
 		return 1;
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 	case KVM_CAP_IRQ_ROUTING:
-- 
1.8.3.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux