Move KVM's hardware enabling to vac.{h,c} as a first step towards building VAC and all of the system-wide virtualization support as a separate module. Defer moving arch code to future patches to keep the diff reasonable. No functional change intended. Signed-off-by: Anish Ghulati <aghulati@xxxxxxxxxx> --- virt/kvm/kvm_main.c | 197 +------------------------------------------- virt/kvm/vac.c | 177 +++++++++++++++++++++++++++++++++++++++ virt/kvm/vac.h | 26 ++++++ 3 files changed, 204 insertions(+), 196 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f585a159b4f5..fb50deaad3fd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -59,6 +59,7 @@ #include "coalesced_mmio.h" #include "async_pf.h" #include "kvm_mm.h" +#include "vac.h" #include "vfio.h" #include <trace/events/ipi.h> @@ -140,8 +141,6 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file) #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ .open = kvm_no_compat_open #endif -static int hardware_enable_all(void); -static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); @@ -5167,200 +5166,6 @@ static struct miscdevice kvm_dev = { &kvm_chardev_ops, }; -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -__visible bool kvm_rebooting; -EXPORT_SYMBOL_GPL(kvm_rebooting); - -static DEFINE_PER_CPU(bool, hardware_enabled); -static int kvm_usage_count; - -static int __hardware_enable_nolock(void) -{ - if (__this_cpu_read(hardware_enabled)) - return 0; - - if (kvm_arch_hardware_enable()) { - pr_info("kvm: enabling virtualization on CPU%d failed\n", - raw_smp_processor_id()); - return -EIO; - } - - __this_cpu_write(hardware_enabled, true); - return 0; -} - -static void hardware_enable_nolock(void *failed) -{ - if (__hardware_enable_nolock()) - atomic_inc(failed); -} - -static int kvm_online_cpu(unsigned int cpu) -{ - int ret = 0; - - /* - * Abort the CPU online process if hardware virtualization cannot - * be enabled. Otherwise running VMs would encounter unrecoverable - * errors when scheduled to this CPU. - */ - mutex_lock(&kvm_lock); - if (kvm_usage_count) - ret = __hardware_enable_nolock(); - mutex_unlock(&kvm_lock); - return ret; -} - -static void hardware_disable_nolock(void *junk) -{ - /* - * Note, hardware_disable_all_nolock() tells all online CPUs to disable - * hardware, not just CPUs that successfully enabled hardware! - */ - if (!__this_cpu_read(hardware_enabled)) - return; - - kvm_arch_hardware_disable(); - - __this_cpu_write(hardware_enabled, false); -} - -static int kvm_offline_cpu(unsigned int cpu) -{ - mutex_lock(&kvm_lock); - if (kvm_usage_count) - hardware_disable_nolock(NULL); - mutex_unlock(&kvm_lock); - return 0; -} - -static void hardware_disable_all_nolock(void) -{ - BUG_ON(!kvm_usage_count); - - kvm_usage_count--; - if (!kvm_usage_count) - on_each_cpu(hardware_disable_nolock, NULL, 1); -} - -static void hardware_disable_all(void) -{ - cpus_read_lock(); - mutex_lock(&kvm_lock); - hardware_disable_all_nolock(); - mutex_unlock(&kvm_lock); - cpus_read_unlock(); -} - -static int hardware_enable_all(void) -{ - atomic_t failed = ATOMIC_INIT(0); - int r; - - /* - * Do not enable hardware virtualization if the system is going down. - * If userspace initiated a forced reboot, e.g. reboot -f, then it's - * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling - * after kvm_reboot() is called. Note, this relies on system_state - * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops - * hook instead of registering a dedicated reboot notifier (the latter - * runs before system_state is updated). - */ - if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || - system_state == SYSTEM_RESTART) - return -EBUSY; - - /* - * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() - * is called, and so on_each_cpu() between them includes the CPU that - * is being onlined. As a result, hardware_enable_nolock() may get - * invoked before kvm_online_cpu(), which also enables hardware if the - * usage count is non-zero. Disable CPU hotplug to avoid attempting to - * enable hardware multiple times. - */ - cpus_read_lock(); - mutex_lock(&kvm_lock); - - r = 0; - - kvm_usage_count++; - if (kvm_usage_count == 1) { - on_each_cpu(hardware_enable_nolock, &failed, 1); - - if (atomic_read(&failed)) { - hardware_disable_all_nolock(); - r = -EBUSY; - } - } - - mutex_unlock(&kvm_lock); - cpus_read_unlock(); - - return r; -} - -static void kvm_shutdown(void) -{ - /* - * Disable hardware virtualization and set kvm_rebooting to indicate - * that KVM has asynchronously disabled hardware virtualization, i.e. - * that relevant errors and exceptions aren't entirely unexpected. - * Some flavors of hardware virtualization need to be disabled before - * transferring control to firmware (to perform shutdown/reboot), e.g. - * on x86, virtualization can block INIT interrupts, which are used by - * firmware to pull APs back under firmware control. Note, this path - * is used for both shutdown and reboot scenarios, i.e. neither name is - * 100% comprehensive. - */ - pr_info("kvm: exiting hardware virtualization\n"); - kvm_rebooting = true; - on_each_cpu(hardware_disable_nolock, NULL, 1); -} - -static int kvm_suspend(void) -{ - /* - * Secondary CPUs and CPU hotplug are disabled across the suspend/resume - * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count - * is stable. Assert that kvm_lock is not held to ensure the system - * isn't suspended while KVM is enabling hardware. Hardware enabling - * can be preempted, but the task cannot be frozen until it has dropped - * all locks (userspace tasks are frozen via a fake signal). - */ - lockdep_assert_not_held(&kvm_lock); - lockdep_assert_irqs_disabled(); - - if (kvm_usage_count) - hardware_disable_nolock(NULL); - return 0; -} - -static void kvm_resume(void) -{ - lockdep_assert_not_held(&kvm_lock); - lockdep_assert_irqs_disabled(); - - if (kvm_usage_count) - WARN_ON_ONCE(__hardware_enable_nolock()); -} - -static struct syscore_ops kvm_syscore_ops = { - .suspend = kvm_suspend, - .resume = kvm_resume, - .shutdown = kvm_shutdown, -}; -#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ -static int hardware_enable_all(void) -{ - return 0; -} - -static void hardware_disable_all(void) -{ - -} -#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ - static void kvm_iodevice_destructor(struct kvm_io_device *dev) { if (dev->ops->destructor) diff --git a/virt/kvm/vac.c b/virt/kvm/vac.c index 18d2ae7d3e47..ff034a53af50 100644 --- a/virt/kvm/vac.c +++ b/virt/kvm/vac.c @@ -1,3 +1,180 @@ // SPDX-License-Identifier: GPL-2.0-only #include "vac.h" + +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/mutex.h> + +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +DEFINE_MUTEX(vac_lock); + +__visible bool kvm_rebooting; +EXPORT_SYMBOL_GPL(kvm_rebooting); + +static DEFINE_PER_CPU(bool, hardware_enabled); +static int kvm_usage_count; + +static int __hardware_enable_nolock(void) +{ + if (__this_cpu_read(hardware_enabled)) + return 0; + + if (kvm_arch_hardware_enable()) { + pr_info("kvm: enabling virtualization on CPU%d failed\n", + raw_smp_processor_id()); + return -EIO; + } + + __this_cpu_write(hardware_enabled, true); + return 0; +} + +static void hardware_enable_nolock(void *failed) +{ + if (__hardware_enable_nolock()) + atomic_inc(failed); +} + +int kvm_online_cpu(unsigned int cpu) +{ + int ret = 0; + + /* + * Abort the CPU online process if hardware virtualization cannot + * be enabled. Otherwise running VMs would encounter unrecoverable + * errors when scheduled to this CPU. + */ + mutex_lock(&vac_lock); + if (kvm_usage_count) + ret = __hardware_enable_nolock(); + mutex_unlock(&vac_lock); + return ret; +} + +static void hardware_disable_nolock(void *junk) +{ + /* + * Note, hardware_disable_all_nolock() tells all online CPUs to disable + * hardware, not just CPUs that successfully enabled hardware! + */ + if (!__this_cpu_read(hardware_enabled)) + return; + + kvm_arch_hardware_disable(); + + __this_cpu_write(hardware_enabled, false); +} + +int kvm_offline_cpu(unsigned int cpu) +{ + mutex_lock(&vac_lock); + if (kvm_usage_count) + hardware_disable_nolock(NULL); + mutex_unlock(&vac_lock); + return 0; +} + +static void hardware_disable_all_nolock(void) +{ + BUG_ON(!kvm_usage_count); + + kvm_usage_count--; + if (!kvm_usage_count) + on_each_cpu(hardware_disable_nolock, NULL, 1); +} + +void hardware_disable_all(void) +{ + cpus_read_lock(); + mutex_lock(&vac_lock); + hardware_disable_all_nolock(); + mutex_unlock(&vac_lock); + cpus_read_unlock(); +} + +int hardware_enable_all(void) +{ + atomic_t failed = ATOMIC_INIT(0); + int r = 0; + + /* + * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() + * is called, and so on_each_cpu() between them includes the CPU that + * is being onlined. As a result, hardware_enable_nolock() may get + * invoked before kvm_online_cpu(), which also enables hardware if the + * usage count is non-zero. Disable CPU hotplug to avoid attempting to + * enable hardware multiple times. + */ + cpus_read_lock(); + mutex_lock(&vac_lock); + + kvm_usage_count++; + if (kvm_usage_count == 1) { + on_each_cpu(hardware_enable_nolock, &failed, 1); + + if (atomic_read(&failed)) { + hardware_disable_all_nolock(); + r = -EBUSY; + } + } + + mutex_unlock(&vac_lock); + cpus_read_unlock(); + + return r; +} + +static int kvm_reboot(struct notifier_block *notifier, unsigned long val, + void *v) +{ + /* + * Some (well, at least mine) BIOSes hang on reboot if + * in vmx root mode. + * + * And Intel TXT required VMX off for all cpu when system shutdown. + */ + pr_info("kvm: exiting hardware virtualization\n"); + kvm_rebooting = true; + on_each_cpu(hardware_disable_nolock, NULL, 1); + return NOTIFY_OK; +} + +static int kvm_suspend(void) +{ + /* + * Secondary CPUs and CPU hotplug are disabled across the suspend/resume + * callbacks, i.e. no need to acquire vac_lock to ensure the usage count + * is stable. Assert that vac_lock is not held to ensure the system + * isn't suspended while KVM is enabling hardware. Hardware enabling + * can be preempted, but the task cannot be frozen until it has dropped + * all locks (userspace tasks are frozen via a fake signal). + */ + lockdep_assert_not_held(&vac_lock); + lockdep_assert_irqs_disabled(); + + if (kvm_usage_count) + hardware_disable_nolock(NULL); + return 0; +} + +static void kvm_resume(void) +{ + lockdep_assert_not_held(&vac_lock); + lockdep_assert_irqs_disabled(); + + if (kvm_usage_count) + WARN_ON_ONCE(__hardware_enable_nolock()); +} + +struct notifier_block kvm_reboot_notifier = { + .notifier_call = kvm_reboot, + .priority = 0, +}; + +struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; + +#endif diff --git a/virt/kvm/vac.h b/virt/kvm/vac.h index 8f7123a916c5..aed178a16bdb 100644 --- a/virt/kvm/vac.h +++ b/virt/kvm/vac.h @@ -3,4 +3,30 @@ #ifndef __KVM_VAC_H__ #define __KVM_VAC_H__ +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING + +#include <linux/kvm_host.h> +#include <linux/syscore_ops.h> + +int kvm_online_cpu(unsigned int cpu); +int kvm_offline_cpu(unsigned int cpu); +void hardware_disable_all(void); +int hardware_enable_all(void); + +extern struct notifier_block kvm_reboot_notifier; + +extern struct syscore_ops kvm_syscore_ops; + +#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ +static inline int hardware_enable_all(void) +{ + return 0; +} + +static inline void hardware_disable_all(void) +{ + +} +#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ + #endif -- 2.42.0.869.gea05f2083d-goog