A number of use cases have surfaced where it'd be beneficial to have a vCPU stop its execution in user-space, as opposed to having it sleep in-kernel. Be it in order to make better use of the pCPU's time while the vCPU is halted, or to implement security features like Hyper-V's VSM. A problem with this approach is that user-space has no way of knowing whether the vCPU has pending events (interrupts, timers, etc...), so we need a new interface to query if they are. poll() turned out to be a very good fit. So enable polling vCPUs. The poll() interface considers a vCPU has a pending event if it didn't enter the guest since being kicked by an event source (being kicked forces a guest exit). Kicking a vCPU that has pollers wakes up the polling threads. NOTES: - There is a race between the 'vcpu->kicked' check in the polling thread and the vCPU thread re-entering the guest. This hardly affects the use-cases stated above, but needs to be fixed. - This was tested alongside a WIP Hyper-V Virtual Trust Level implementation which makes ample use of the poll() interface. Signed-off-by: Nicolas Saenz Julienne <nsaenz@xxxxxxxxxx> --- arch/x86/kvm/x86.c | 2 ++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 57f9c58e1e32..bf4891bc044e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10788,6 +10788,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } + WRITE_ONCE(vcpu->kicked, false); + if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); static_call(kvm_x86_request_immediate_exit)(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 687589ce9f63..71e1e8cf8936 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -336,6 +336,7 @@ struct kvm_vcpu { #endif int mode; u64 requests; + bool kicked; unsigned long guest_debug; struct mutex mutex; @@ -395,6 +396,7 @@ struct kvm_vcpu { */ struct kvm_memory_slot *last_used_slot; u64 last_used_slot_gen; + wait_queue_head_t wqh; }; /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ad9aab898a0c..fde004a0ac46 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -497,12 +497,14 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; vcpu->ready = false; + vcpu->kicked = false; preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); vcpu->last_used_slot = NULL; /* Fill the stats id string for the vcpu */ snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", task_pid_nr(current), id); + init_waitqueue_head(&vcpu->wqh); } static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -3970,6 +3972,10 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) smp_send_reschedule(cpu); } + + if (!cmpxchg(&vcpu->kicked, false, true)) + wake_up_interruptible(&vcpu->wqh); + out: put_cpu(); } @@ -4174,6 +4180,29 @@ static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static __poll_t kvm_vcpu_poll(struct file *file, poll_table *wait) +{ + struct kvm_vcpu *vcpu = file->private_data; + + poll_wait(file, &vcpu->wqh, wait); + + /* + * Make sure we read vcpu->kicked after adding the vcpu into + * the waitqueue list. Otherwise we might have the following race: + * + * READ_ONCE(vcpu->kicked) + * cmpxchg(&vcpu->kicked, false, true)) + * wake_up_interruptible(&vcpu->wqh) + * list_add_tail(wait, &vcpu->wqh) + */ + smp_mb(); + if (READ_ONCE(vcpu->kicked)) { + return EPOLLIN; + } + + return 0; +} + static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; @@ -4186,6 +4215,7 @@ static const struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, + .poll = kvm_vcpu_poll, .llseek = noop_llseek, KVM_COMPAT(kvm_vcpu_compat_ioctl), }; -- 2.40.1