The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable@xxxxxxxxxxxxxxx>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 2b01281273738bf2d6551da48d65db2df3f28998 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable@xxxxxxxxxxxxxxx>' --in-reply-to '16781189338715@xxxxxxxxx' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: 2b0128127373 ("KVM: Register /dev/kvm as the _very_ last thing during initialization") baff59ccdc65 ("KVM: Pre-allocate cpumasks for kvm_make_all_cpus_request_except()") ae0946cd3601 ("KVM: Optimize kvm_make_vcpus_request_mask() a bit") 0bbc2ca8515f ("KVM: KVM: Use cpumask_available() to check for NULL cpumask when kicking vCPUs") 85b640450ddc ("KVM: Clean up benign vcpu->cpu data races when kicking vCPUs") e649b3f0188f ("KVM: x86: Fix APIC page invalidation race") 54163a346d4a ("KVM: Introduce kvm_make_all_cpus_request_except()") db5a95ec166f ("KVM: x86: remove set but not used variable 'called'") 7ee30bc132c6 ("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs") dfcd66604c1c ("mm/mmu_notifier: convert user range->blockable to helper function") a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range") 73231612dc7c ("mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault()") 25f23a0c7127 ("mm/hmm: improve and rename hmm_vma_get_pfns() to hmm_range_snapshot()") 9f454612f602 ("mm/hmm: do not erase snapshot when a range is invalidated") 704f3f2cf63c ("mm/hmm: use reference counting for HMM struct") 484d9a844d0d ("drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start") ac46d4f3c432 ("mm/mmu_notifier: use structure for invalidate_range_start/end calls v2") 5d6527a784f7 ("mm/mmu_notifier: use structure for invalidate_range_start/end callback") ec131b2d7fa6 ("mm/hmm: invalidate device page table at start of invalidation") 44532d4c591c ("mm/hmm: use a structure for update callback parameters") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 2b01281273738bf2d6551da48d65db2df3f28998 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@xxxxxxxxxx> Date: Wed, 30 Nov 2022 23:08:45 +0000 Subject: [PATCH] KVM: Register /dev/kvm as the _very_ last thing during initialization Register /dev/kvm, i.e. expose KVM to userspace, only after all other setup has completed. Once /dev/kvm is exposed, userspace can start invoking KVM ioctls, creating VMs, etc... If userspace creates a VM before KVM is done with its configuration, bad things may happen, e.g. KVM will fail to properly migrate vCPU state if a VM is created before KVM has registered preemption notifiers. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> Message-Id: <20221130230934.1014142-2-seanjc@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 13e88297f999..28a1a02f5228 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5988,12 +5988,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, kvm_chardev_ops.owner = module; - r = misc_register(&kvm_dev); - if (r) { - pr_err("kvm: misc device register failed\n"); - goto out_unreg; - } - register_syscore_ops(&kvm_syscore_ops); kvm_preempt_ops.sched_in = kvm_sched_in; @@ -6002,11 +5996,24 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, kvm_init_debug(); r = kvm_vfio_ops_init(); - WARN_ON(r); + if (WARN_ON_ONCE(r)) + goto err_vfio; + + /* + * Registration _must_ be the very last thing done, as this exposes + * /dev/kvm to userspace, i.e. all infrastructure must be setup! + */ + r = misc_register(&kvm_dev); + if (r) { + pr_err("kvm: misc device register failed\n"); + goto err_register; + } return 0; -out_unreg: +err_register: + kvm_vfio_ops_exit(); +err_vfio: kvm_async_pf_deinit(); out_free_4: for_each_possible_cpu(cpu) @@ -6032,8 +6039,14 @@ void kvm_exit(void) { int cpu; - debugfs_remove_recursive(kvm_debugfs_dir); + /* + * Note, unregistering /dev/kvm doesn't strictly need to come first, + * fops_get(), a.k.a. try_module_get(), prevents acquiring references + * to KVM while the module is being stopped. + */ misc_deregister(&kvm_dev); + + debugfs_remove_recursive(kvm_debugfs_dir); for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache);