On 22/04/20 06:07, Davidlohr Bueso wrote: > The use of any sort of waitqueue (simple or regular) for > wait/waking vcpus has always been an overkill and semantically > wrong. Because this is per-vcpu (which is blocked) there is > only ever a single waiting vcpu, thus no need for any sort of > queue. > > As such, make use of the rcuwait primitive, with the following > considerations: > > - rcuwait already provides the proper barriers that serialize > concurrent waiter and waker. > > - Task wakeup is done in rcu read critical region, with a > stable task pointer. > > - Because there is no concurrency among waiters, we need > not worry about rcuwait_wait_event() calls corrupting > the wait->task. As a consequence, this saves the locking > done in swait when modifying the queue. This also applies > to per-vcore wait for powerpc kvm-hv. > > The x86 tscdeadline_latency test mentioned in 8577370fb0cb > ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg, > latency is reduced by around 15-20% with this change. > > Cc: Paul Mackerras <paulus@xxxxxxxxxx> > Cc: kvmarm@xxxxxxxxxxxxxxxxxxxxx > Cc: linux-mips@xxxxxxxxxxxxxxx > Signed-off-by: Davidlohr Bueso <dbueso@xxxxxxx> Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/mips/kvm/mips.c | 6 ++---- > arch/powerpc/include/asm/kvm_book3s.h | 2 +- > arch/powerpc/include/asm/kvm_host.h | 2 +- > arch/powerpc/kvm/book3s_hv.c | 22 ++++++++-------------- > arch/powerpc/kvm/powerpc.c | 2 +- > arch/x86/kvm/lapic.c | 2 +- > include/linux/kvm_host.h | 10 +++++----- > virt/kvm/arm/arch_timer.c | 2 +- > virt/kvm/arm/arm.c | 9 +++++---- > virt/kvm/async_pf.c | 3 +-- > virt/kvm/kvm_main.c | 19 +++++++++---------- > 11 files changed, 35 insertions(+), 44 deletions(-) > > diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c > index 8f05dd0a0f4e..fad6acce46e4 100644 > --- a/arch/mips/kvm/mips.c > +++ b/arch/mips/kvm/mips.c > @@ -284,8 +284,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) > kvm_mips_callbacks->queue_timer_int(vcpu); > > vcpu->arch.wait = 0; > - if (swq_has_sleeper(&vcpu->wq)) > - swake_up_one(&vcpu->wq); > + rcuwait_wake_up(&vcpu->wait); > > return kvm_mips_count_timeout(vcpu); > } > @@ -511,8 +510,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, > > dvcpu->arch.wait = 0; > > - if (swq_has_sleeper(&dvcpu->wq)) > - swake_up_one(&dvcpu->wq); > + rcuwait_wake_up(&dvcpu->wait); > > return 0; > } > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h > index 506e4df2d730..6e5d85ba588d 100644 > --- a/arch/powerpc/include/asm/kvm_book3s.h > +++ b/arch/powerpc/include/asm/kvm_book3s.h > @@ -78,7 +78,7 @@ struct kvmppc_vcore { > struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; > struct list_head preempt_list; > spinlock_t lock; > - struct swait_queue_head wq; > + struct rcuwait wait; > spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ > u64 stolen_tb; > u64 preempt_tb; > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index 1dc63101ffe1..337047ba4a56 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -751,7 +751,7 @@ struct kvm_vcpu_arch { > u8 irq_pending; /* Used by XIVE to signal pending guest irqs */ > u32 last_inst; > > - struct swait_queue_head *wqp; > + struct rcuwait *waitp; > struct kvmppc_vcore *vcore; > int ret; > int trap; > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 93493f0cbfe8..b8d42f523ca7 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu) > static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) > { > int cpu; > - struct swait_queue_head *wqp; > + struct rcuwait *wait; > > - wqp = kvm_arch_vcpu_wq(vcpu); > - if (swq_has_sleeper(wqp)) { > - swake_up_one(wqp); > + wait = kvm_arch_vcpu_get_wait(vcpu); > + if (rcuwait_wake_up(wait)) > ++vcpu->stat.halt_wakeup; > - } > > cpu = READ_ONCE(vcpu->arch.thread_cpu); > if (cpu >= 0 && kvmppc_ipi_thread(cpu)) > @@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id) > > spin_lock_init(&vcore->lock); > spin_lock_init(&vcore->stoltb_lock); > - init_swait_queue_head(&vcore->wq); > + rcuwait_init(&vcore->wait); > vcore->preempt_tb = TB_NIL; > vcore->lpcr = kvm->arch.lpcr; > vcore->first_vcpuid = id; > @@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) > ktime_t cur, start_poll, start_wait; > int do_sleep = 1; > u64 block_ns; > - DECLARE_SWAITQUEUE(wait); > > /* Poll for pending exceptions and ceded state */ > cur = start_poll = ktime_get(); > @@ -3812,10 +3809,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) > } > } > > - prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE); > - > if (kvmppc_vcore_check_block(vc)) { > - finish_swait(&vc->wq, &wait); > do_sleep = 0; > /* If we polled, count this as a successful poll */ > if (vc->halt_poll_ns) > @@ -3828,8 +3822,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) > vc->vcore_state = VCORE_SLEEPING; > trace_kvmppc_vcore_blocked(vc, 0); > spin_unlock(&vc->lock); > - schedule(); > - finish_swait(&vc->wq, &wait); > + rcuwait_wait_event(&vc->wait, > + kvmppc_vcore_check_block(vc), TASK_INTERRUPTIBLE); > spin_lock(&vc->lock); > vc->vcore_state = VCORE_INACTIVE; > trace_kvmppc_vcore_blocked(vc, 1); > @@ -3940,7 +3934,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) > kvmppc_start_thread(vcpu, vc); > trace_kvm_guest_enter(vcpu); > } else if (vc->vcore_state == VCORE_SLEEPING) { > - swake_up_one(&vc->wq); > + rcuwait_wake_up(&vc->wait); > } > > } > @@ -4279,7 +4273,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) > } > user_vrsave = mfspr(SPRN_VRSAVE); > > - vcpu->arch.wqp = &vcpu->arch.vcore->wq; > + vcpu->arch.waitp = &vcpu->arch.vcore->wait; > vcpu->arch.pgdir = kvm->mm->pgd; > vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; > > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index e15166b0a16d..4a074b587520 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -751,7 +751,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) > if (err) > goto out_vcpu_uninit; > > - vcpu->arch.wqp = &vcpu->wq; > + vcpu->arch.waitp = &vcpu->wait; > kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); > return 0; > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 9af25c97612a..fb2f56ba171d 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1833,7 +1833,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) > /* If the preempt notifier has already run, it also called apic_timer_expired */ > if (!apic->lapic_timer.hv_timer_in_use) > goto out; > - WARN_ON(swait_active(&vcpu->wq)); > + WARN_ON(rcu_dereference(vcpu->wait.task)); > cancel_hv_timer(apic); > apic_timer_expired(apic); > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 6d58beb65454..fc34021546bd 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -23,7 +23,7 @@ > #include <linux/irqflags.h> > #include <linux/context_tracking.h> > #include <linux/irqbypass.h> > -#include <linux/swait.h> > +#include <linux/rcuwait.h> > #include <linux/refcount.h> > #include <linux/nospec.h> > #include <asm/signal.h> > @@ -277,7 +277,7 @@ struct kvm_vcpu { > struct mutex mutex; > struct kvm_run *run; > > - struct swait_queue_head wq; > + struct rcuwait wait; > struct pid __rcu *pid; > int sigset_active; > sigset_t sigset; > @@ -956,12 +956,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) > } > #endif > > -static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) > +static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) > { > #ifdef __KVM_HAVE_ARCH_WQP > - return vcpu->arch.wqp; > + return vcpu->arch.waitp; > #else > - return &vcpu->wq; > + return &vcpu->wait; > #endif > } > > diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c > index 93bd59b46848..b2805105bbe5 100644 > --- a/virt/kvm/arm/arch_timer.c > +++ b/virt/kvm/arm/arch_timer.c > @@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) > if (map.emul_ptimer) > soft_timer_cancel(&map.emul_ptimer->hrtimer); > > - if (swait_active(kvm_arch_vcpu_wq(vcpu))) > + if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL) > kvm_timer_blocking(vcpu); > > /* > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c > index 48d0ec44ad77..f94a10bb1251 100644 > --- a/virt/kvm/arm/arm.c > +++ b/virt/kvm/arm/arm.c > @@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm) > > kvm_for_each_vcpu(i, vcpu, kvm) { > vcpu->arch.pause = false; > - swake_up_one(kvm_arch_vcpu_wq(vcpu)); > + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); > } > } > > static void vcpu_req_sleep(struct kvm_vcpu *vcpu) > { > - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); > + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); > > - swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) && > - (!vcpu->arch.pause))); > + rcuwait_wait_event(*wait, > + (!vcpu->arch.power_off) &&(!vcpu->arch.pause), > + TASK_INTERRUPTIBLE); > > if (vcpu->arch.power_off || vcpu->arch.pause) { > /* Awaken to handle a signal, request we sleep again later. */ > diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c > index 15e5b037f92d..10b533f641a6 100644 > --- a/virt/kvm/async_pf.c > +++ b/virt/kvm/async_pf.c > @@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work) > > trace_kvm_async_pf_completed(addr, cr2_or_gpa); > > - if (swq_has_sleeper(&vcpu->wq)) > - swake_up_one(&vcpu->wq); > + rcuwait_wake_up(&vcpu->wait); > > mmput(mm); > kvm_put_kvm(vcpu->kvm); > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 74bdb7bf3295..835fb109badf 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -341,7 +341,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) > vcpu->kvm = kvm; > vcpu->vcpu_id = id; > vcpu->pid = NULL; > - init_swait_queue_head(&vcpu->wq); > + rcuwait_init(&vcpu->wait); > kvm_async_pf_vcpu_init(vcpu); > > vcpu->pre_pcpu = -1; > @@ -2671,7 +2671,6 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) > void kvm_vcpu_block(struct kvm_vcpu *vcpu) > { > ktime_t start, cur; > - DECLARE_SWAITQUEUE(wait); > bool waited = false; > u64 block_ns; > > @@ -2697,8 +2696,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) > } while (single_task_running() && ktime_before(cur, stop)); > } > > + prepare_to_rcuwait(&vcpu->wait); > for (;;) { > - prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); > + set_current_state(TASK_INTERRUPTIBLE); > > if (kvm_vcpu_check_block(vcpu) < 0) > break; > @@ -2706,8 +2706,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) > waited = true; > schedule(); > } > - > - finish_swait(&vcpu->wq, &wait); > + finish_rcuwait(&vcpu->wait); > cur = ktime_get(); > out: > kvm_arch_vcpu_unblocking(vcpu); > @@ -2738,11 +2737,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block); > > bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) > { > - struct swait_queue_head *wqp; > + struct rcuwait *wait; > > - wqp = kvm_arch_vcpu_wq(vcpu); > - if (swq_has_sleeper(wqp)) { > - swake_up_one(wqp); > + wait = kvm_arch_vcpu_get_wait(vcpu); > + if (rcuwait_wake_up(wait)) { > WRITE_ONCE(vcpu->ready, true); > ++vcpu->stat.halt_wakeup; > return true; > @@ -2884,7 +2882,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) > continue; > if (vcpu == me) > continue; > - if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) > + if (rcu_dereference(vcpu->wait.task) && > + !vcpu_dy_runnable(vcpu)) > continue; > if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && > !kvm_arch_vcpu_in_kernel(vcpu)) >