On Tue, Aug 6, 2024 at 12:56 AM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > + if (unlikely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus))) > + return -EINVAL; +1 to having the test _somewhere_ for async ioctls, there's so much that can go wrong if a vcpu is not reachable by for_each_vcpu. > /* > * Some architectures have vcpu ioctls that are asynchronous to vcpu > * execution; mutex_lock() would break them. > > The mutex approach, sans async ioctl support: Async ioctls can be handled as you suggested above by checking vcpu_idx against online_vcpus. This mutex approach also removes the funky lock/unlock to inform lockdep, which is a nice plus. Paolo > --- > virt/kvm/kvm_main.c | 28 +++++++++++++++++++--------- > 1 file changed, 19 insertions(+), 9 deletions(-) > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index d0788d0a72cc..0a9c390b18a3 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -4269,12 +4269,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) > > mutex_lock(&kvm->lock); > > -#ifdef CONFIG_LOCKDEP > - /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ > - mutex_lock(&vcpu->mutex); > - mutex_unlock(&vcpu->mutex); > -#endif > - > if (kvm_get_vcpu_by_id(kvm, id)) { > r = -EEXIST; > goto unlock_vcpu_destroy; > @@ -4285,15 +4279,29 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) > if (r) > goto unlock_vcpu_destroy; > > - /* Now it's all set up, let userspace reach it */ > + /* > + * Now it's all set up, let userspace reach it. Grab the vCPU's mutex > + * so that userspace can't invoke vCPU ioctl()s until the vCPU is fully > + * visibile (per online_vcpus), e.g. so that KVM doesn't get tricked > + * into a NULL-pointer dereference because KVM thinks the _current_ > + * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep > + * knows it's taken *inside* kvm->lock. > + */ > + mutex_lock(&vcpu->mutex); > kvm_get_kvm(kvm); > r = create_vcpu_fd(vcpu); > if (r < 0) > goto kvm_put_xa_release; > > + /* > + * xa_store() should never fail, see xa_reserve() above. Leak the vCPU > + * if the impossible happens, as userspace already has access to the > + * vCPU, i.e. freeing the vCPU before userspace puts its file reference > + * would trigger a use-after-free. > + */ > if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { > - r = -EINVAL; > - goto kvm_put_xa_release; > + mutex_unlock(&vcpu->mutex); > + return -EINVAL; > } > > /* > @@ -4302,6 +4310,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) > */ > smp_wmb(); > atomic_inc(&kvm->online_vcpus); > + mutex_unlock(&vcpu->mutex); > > mutex_unlock(&kvm->lock); > kvm_arch_vcpu_postcreate(vcpu); > @@ -4309,6 +4318,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) > return r; > > kvm_put_xa_release: > + mutex_unlock(&vcpu->mutex); > kvm_put_kvm_no_destroy(kvm); > xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); > unlock_vcpu_destroy: > > base-commit: 332d2c1d713e232e163386c35a3ba0c1b90df83f > -- >