Steal time is the amount of CPU time needed by a guest virtual machine that is not provided by the host. Steal time occurs when the host allocates this CPU time elsewhere: for example, to another guest. Steal time can be enabled by adding VM configuration option stealclock.enable = "TRUE". It is supported by VMs that run hardware version 13 or newer. This change introduces the VMware steal time infrastructure. The high level code (such as enabling, disabling and hot-plug routines) was derived from KVM one. [Tomer: use READ_ONCE macros and 32bit guests support] Signed-off-by: Alexey Makhalov <amakhalov@xxxxxxxxxx> Co-developed-by: Tomer Zeltzer <tomerr90@xxxxxxxxx> Signed-off-by: Tomer Zeltzer <tomerr90@xxxxxxxxx> Reviewed-by: Thomas Hellstrom <thellstrom@xxxxxxxxxx> --- arch/x86/kernel/cpu/vmware.c | 197 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index efb22fa76ba4..59459992ad47 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -25,6 +25,8 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/clocksource.h> +#include <linux/cpu.h> +#include <linux/reboot.h> #include <asm/div64.h> #include <asm/x86_init.h> #include <asm/hypervisor.h> @@ -47,6 +49,11 @@ #define VMWARE_CMD_GETVCPU_INFO 68 #define VMWARE_CMD_LEGACY_X2APIC 3 #define VMWARE_CMD_VCPU_RESERVED 31 +#define VMWARE_CMD_STEALCLOCK 91 + +#define STEALCLOCK_NOT_AVAILABLE (-1) +#define STEALCLOCK_DISABLED 0 +#define STEALCLOCK_ENABLED 1 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ __asm__("inl (%%dx), %%eax" : \ @@ -86,6 +93,18 @@ } \ } while (0) +struct vmware_steal_time { + union { + uint64_t clock; /* stolen time counter in units of vtsc */ + struct { + /* only for little-endian */ + uint32_t clock_low; + uint32_t clock_high; + }; + }; + uint64_t reserved[7]; +}; + static unsigned long vmware_tsc_khz __ro_after_init; static u8 vmware_hypercall_mode __ro_after_init; @@ -104,6 +123,8 @@ static unsigned long vmware_get_tsc_khz(void) #ifdef CONFIG_PARAVIRT static struct cyc2ns_data vmware_cyc2ns __ro_after_init; static int vmw_sched_clock __initdata = 1; +static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, steal_time) __aligned(64); +static bool has_steal_clock; static __init int setup_vmw_sched_clock(char *s) { @@ -135,6 +156,163 @@ static void __init vmware_cyc2ns_setup(void) pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset); } +static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2) +{ + uint32_t result, info; + + asm volatile (VMWARE_HYPERCALL : + "=a"(result), + "=c"(info) : + "a"(VMWARE_HYPERVISOR_MAGIC), + "b"(0), + "c"(VMWARE_CMD_STEALCLOCK), + "d"(0), + "S"(arg1), + "D"(arg2) : + "memory"); + return result; +} + +static bool stealclock_enable(phys_addr_t pa) +{ + return vmware_cmd_stealclock(upper_32_bits(pa), + lower_32_bits(pa)) == STEALCLOCK_ENABLED; +} + +static int __stealclock_disable(void) +{ + return vmware_cmd_stealclock(0, 1); +} + +static void stealclock_disable(void) +{ + __stealclock_disable(); +} + +static bool vmware_is_stealclock_available(void) +{ + return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE; +} + +/** + * vmware_steal_clock() - read the per-cpu steal clock + * @cpu: the cpu number whose steal clock we want to read + * + * The function reads the steal clock if we are on a 64-bit system, otherwise + * reads it in parts, checking that the high part didn't change in the + * meantime. + * + * Return: + * The steal clock reading in ns. + */ +static uint64_t vmware_steal_clock(int cpu) +{ + struct vmware_steal_time *steal = &per_cpu(steal_time, cpu); + uint64_t clock; + + if (IS_ENABLED(CONFIG_64BIT)) + clock = READ_ONCE(steal->clock); + else { + uint32_t initial_high, low, high; + + do { + initial_high = READ_ONCE(steal->clock_high); + /* Do not reorder initial_high and high readings */ + virt_rmb(); + low = READ_ONCE(steal->clock_low); + /* Keep low reading in between */ + virt_rmb(); + high = READ_ONCE(steal->clock_high); + } while (initial_high != high); + + clock = ((uint64_t)high << 32) | low; + } + + return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul, + vmware_cyc2ns.cyc2ns_shift); +} + +static void vmware_register_steal_time(void) +{ + int cpu = smp_processor_id(); + struct vmware_steal_time *st = &per_cpu(steal_time, cpu); + + if (!has_steal_clock) + return; + + if (!stealclock_enable(slow_virt_to_phys(st))) { + has_steal_clock = false; + return; + } + + pr_info("vmware-stealtime: cpu %d, pa %llx\n", + cpu, (unsigned long long) slow_virt_to_phys(st)); +} + +static void vmware_disable_steal_time(void) +{ + if (!has_steal_clock) + return; + + stealclock_disable(); +} + +static void vmware_guest_cpu_init(void) +{ + if (has_steal_clock) + vmware_register_steal_time(); +} + +static void vmware_pv_guest_cpu_reboot(void *unused) +{ + vmware_disable_steal_time(); +} + +static int vmware_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block vmware_pv_reboot_nb = { + .notifier_call = vmware_pv_reboot_notify, +}; + +#ifdef CONFIG_SMP +static void __init vmware_smp_prepare_boot_cpu(void) +{ + vmware_guest_cpu_init(); + native_smp_prepare_boot_cpu(); +} + +static int vmware_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + vmware_guest_cpu_init(); + local_irq_enable(); + return 0; +} + +static int vmware_cpu_down_prepare(unsigned int cpu) +{ + local_irq_disable(); + vmware_disable_steal_time(); + local_irq_enable(); + return 0; +} +#endif + +static __init int activate_jump_labels(void) +{ + if (has_steal_clock) + static_key_slow_inc(¶virt_steal_enabled); + + return 0; +} +arch_initcall(activate_jump_labels); + static void __init vmware_paravirt_ops_setup(void) { pv_info.name = "VMware hypervisor"; @@ -148,6 +326,25 @@ static void __init vmware_paravirt_ops_setup(void) if (vmw_sched_clock) pv_ops.time.sched_clock = vmware_sched_clock; + if (vmware_is_stealclock_available()) { + has_steal_clock = true; + pv_ops.time.steal_clock = vmware_steal_clock; + + /* We use reboot notifier only to disable steal clock */ + register_reboot_notifier(&vmware_pv_reboot_nb); + +#ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = + vmware_smp_prepare_boot_cpu; + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "x86/vmware:online", + vmware_cpu_online, + vmware_cpu_down_prepare) < 0) + pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n"); +#else + vmware_guest_cpu_init(); +#endif + } } #else #define vmware_paravirt_ops_setup() do {} while (0) -- 2.14.2