On Fri, Aug 2, 2024 at 1:01 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > > To avoid jitter on KVM_RUN due to synchronize_rcu(), use a rwlock instead > of RCU to protect vcpu->pid, a.k.a. the pid of the task last used to a > vCPU. When userspace is doing M:N scheduling of tasks to vCPUs, e.g. to > run SEV migration helper vCPUs during post-copy, the synchronize_rcu() > needed to change the PID associated with the vCPU can stall for hundreds > of milliseconds, which is problematic for latency sensitive post-copy > operations. > > In the directed yield path, do not acquire the lock if it's contended, > i.e. if the associated PID is changing, as that means the vCPU's task is > already running. > > Reported-by: Steve Rutherford <srutherford@xxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/arm64/include/asm/kvm_host.h | 2 +- > include/linux/kvm_host.h | 3 ++- > virt/kvm/kvm_main.c | 32 +++++++++++++++++-------------- > 3 files changed, 21 insertions(+), 16 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index a33f5996ca9f..7199cb014806 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -1115,7 +1115,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, > void kvm_arm_halt_guest(struct kvm *kvm); > void kvm_arm_resume_guest(struct kvm *kvm); > > -#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid) > +#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid)) > > #ifndef __KVM_NVHE_HYPERVISOR__ > #define kvm_call_hyp_nvhe(f, ...) \ > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 689e8be873a7..d6f4e8b2b44c 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -342,7 +342,8 @@ struct kvm_vcpu { > #ifndef __KVM_HAVE_ARCH_WQP > struct rcuwait wait; > #endif > - struct pid __rcu *pid; > + struct pid *pid; > + rwlock_t pid_lock; > int sigset_active; > sigset_t sigset; > unsigned int halt_poll_ns; > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 91048a7ad3be..fabffd85fa34 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -486,6 +486,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) > vcpu->kvm = kvm; > vcpu->vcpu_id = id; > vcpu->pid = NULL; > + rwlock_init(&vcpu->pid_lock); > #ifndef __KVM_HAVE_ARCH_WQP > rcuwait_init(&vcpu->wait); > #endif > @@ -513,7 +514,7 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) > * the vcpu->pid pointer, and at destruction time all file descriptors > * are already gone. > */ > - put_pid(rcu_dereference_protected(vcpu->pid, 1)); > + put_pid(vcpu->pid); > > free_page((unsigned long)vcpu->run); > kmem_cache_free(kvm_vcpu_cache, vcpu); > @@ -3930,15 +3931,17 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_kick); > > int kvm_vcpu_yield_to(struct kvm_vcpu *target) > { > - struct pid *pid; > struct task_struct *task = NULL; > int ret; > > - rcu_read_lock(); > - pid = rcu_dereference(target->pid); > - if (pid) > - task = get_pid_task(pid, PIDTYPE_PID); > - rcu_read_unlock(); > + if (!read_trylock(&target->pid_lock)) > + return 0; > + > + if (target->pid) > + task = get_pid_task(target->pid, PIDTYPE_PID); > + > + read_unlock(&target->pid_lock); > + > if (!task) > return 0; > ret = yield_to(task, 1); > @@ -4178,9 +4181,9 @@ static int vcpu_get_pid(void *data, u64 *val) > { > struct kvm_vcpu *vcpu = data; > > - rcu_read_lock(); > - *val = pid_nr(rcu_dereference(vcpu->pid)); > - rcu_read_unlock(); > + read_lock(&vcpu->pid_lock); > + *val = pid_nr(vcpu->pid); > + read_unlock(&vcpu->pid_lock); > return 0; > } > > @@ -4466,7 +4469,7 @@ static long kvm_vcpu_ioctl(struct file *filp, > r = -EINVAL; > if (arg) > goto out; > - oldpid = rcu_access_pointer(vcpu->pid); > + oldpid = vcpu->pid; Overall this patch looks correct, but this spot took me a moment, and I want to confirm. This skips the reader lock since writing only happens just below, under the vcpu lock, and we've already taken that lock? > if (unlikely(oldpid != task_pid(current))) { > /* The thread running this VCPU changed. */ > struct pid *newpid; > @@ -4476,9 +4479,10 @@ static long kvm_vcpu_ioctl(struct file *filp, > break; > > newpid = get_task_pid(current, PIDTYPE_PID); > - rcu_assign_pointer(vcpu->pid, newpid); > - if (oldpid) > - synchronize_rcu(); > + write_lock(&vcpu->pid_lock); > + vcpu->pid = newpid; > + write_unlock(&vcpu->pid_lock); > + > put_pid(oldpid); > } > vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); > -- > 2.46.0.rc2.264.g509ed76dc8-goog >