Hi, Bibo, On Tue, Apr 30, 2024 at 9:45 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote: > > Percpu struct kvm_steal_time is added here, its size is 64 bytes and > also defined as 64 bytes, so that the whole structure is in one physical > page. > > When vcpu is onlined, function pv_enable_steal_time() is called. This > function will pass guest physical address of struct kvm_steal_time and > tells hypervisor to enable steal time. When vcpu is offline, physical > address is set as 0 and tells hypervisor to disable steal time. > > Here is output of vmstat on guest when there is workload on both host > and guest. It includes steal time stat information. > > procs -----------memory---------- -----io---- -system-- ------cpu----- > r b swpd free inact active bi bo in cs us sy id wa st > 15 1 0 7583616 184112 72208 20 0 162 52 31 6 43 0 20 > 17 0 0 7583616 184704 72192 0 0 6318 6885 5 60 8 5 22 > 16 0 0 7583616 185392 72144 0 0 1766 1081 0 49 0 1 50 > 16 0 0 7583616 184816 72304 0 0 6300 6166 4 62 12 2 20 > 18 0 0 7583632 184480 72240 0 0 2814 1754 2 58 4 1 35 > > Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx> > --- > arch/loongarch/Kconfig | 11 +++ > arch/loongarch/include/asm/paravirt.h | 5 + > arch/loongarch/kernel/paravirt.c | 131 ++++++++++++++++++++++++++ > arch/loongarch/kernel/time.c | 2 + > 4 files changed, 149 insertions(+) > > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 0a1540a8853e..f3a03c33a052 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -592,6 +592,17 @@ config PARAVIRT > over full virtualization. However, when run without a hypervisor > the kernel is theoretically slower and slightly larger. > > +config PARAVIRT_TIME_ACCOUNTING > + bool "Paravirtual steal time accounting" > + select PARAVIRT > + help > + Select this option to enable fine granularity task steal time > + accounting. Time spent executing other tasks in parallel with > + the current vCPU is discounted from the vCPU power. To account for > + that, there can be a small performance impact. > + > + If in doubt, say N here. > + Can we use a hidden selection manner, which means: config PARAVIRT_TIME_ACCOUNTING def_bool PARAVIRT Because I think we needn't give too many choices to users (and bring much more effort to test). PowerPC even hide all the PARAVIRT config options... see arch/powerpc/platforms/pseries/Kconfig Huacai > config ARCH_SUPPORTS_KEXEC > def_bool y > > diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h > index 58f7b7b89f2c..fe27fb5e82b8 100644 > --- a/arch/loongarch/include/asm/paravirt.h > +++ b/arch/loongarch/include/asm/paravirt.h > @@ -17,11 +17,16 @@ static inline u64 paravirt_steal_clock(int cpu) > } > > int pv_ipi_init(void); > +int __init pv_time_init(void); > #else > static inline int pv_ipi_init(void) > { > return 0; > } > > +static inline int pv_time_init(void) > +{ > + return 0; > +} > #endif // CONFIG_PARAVIRT > #endif > diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c > index 9044ed62045c..3f83afc7e2d2 100644 > --- a/arch/loongarch/kernel/paravirt.c > +++ b/arch/loongarch/kernel/paravirt.c > @@ -5,10 +5,13 @@ > #include <linux/jump_label.h> > #include <linux/kvm_para.h> > #include <asm/paravirt.h> > +#include <linux/reboot.h> > #include <linux/static_call.h> > > struct static_key paravirt_steal_enabled; > struct static_key paravirt_steal_rq_enabled; > +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); > +static int has_steal_clock; > > static u64 native_steal_clock(int cpu) > { > @@ -17,6 +20,57 @@ static u64 native_steal_clock(int cpu) > > DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); > > +static bool steal_acc = true; > +static int __init parse_no_stealacc(char *arg) > +{ > + steal_acc = false; > + return 0; > +} > +early_param("no-steal-acc", parse_no_stealacc); > + > +static u64 para_steal_clock(int cpu) > +{ > + u64 steal; > + struct kvm_steal_time *src; > + int version; > + > + src = &per_cpu(steal_time, cpu); > + do { > + > + version = src->version; > + /* Make sure that the version is read before the steal */ > + virt_rmb(); > + steal = src->steal; > + /* Make sure that the steal is read before the next version */ > + virt_rmb(); > + > + } while ((version & 1) || (version != src->version)); > + return steal; > +} > + > +static int pv_enable_steal_time(void) > +{ > + int cpu = smp_processor_id(); > + struct kvm_steal_time *st; > + unsigned long addr; > + > + if (!has_steal_clock) > + return -EPERM; > + > + st = &per_cpu(steal_time, cpu); > + addr = per_cpu_ptr_to_phys(st); > + > + /* The whole structure kvm_steal_time should be one page */ > + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { > + pr_warn("Illegal PV steal time addr %lx\n", addr); > + return -EFAULT; > + } > + > + addr |= KVM_STEAL_PHYS_VALID; > + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); > + return 0; > +} > + > #ifdef CONFIG_SMP > static void pv_send_ipi_single(int cpu, unsigned int action) > { > @@ -110,6 +164,32 @@ static void pv_init_ipi(void) > if (r < 0) > panic("SWI0 IRQ request failed\n"); > } > + > +static void pv_disable_steal_time(void) > +{ > + if (has_steal_clock) > + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); > +} > + > +static int pv_time_cpu_online(unsigned int cpu) > +{ > + unsigned long flags; > + > + local_irq_save(flags); > + pv_enable_steal_time(); > + local_irq_restore(flags); > + return 0; > +} > + > +static int pv_time_cpu_down_prepare(unsigned int cpu) > +{ > + unsigned long flags; > + > + local_irq_save(flags); > + pv_disable_steal_time(); > + local_irq_restore(flags); > + return 0; > +} > #endif > > static bool kvm_para_available(void) > @@ -149,3 +229,54 @@ int __init pv_ipi_init(void) > > return 1; > } > + > +static void pv_cpu_reboot(void *unused) > +{ > + pv_disable_steal_time(); > +} > + > +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, > + void *unused) > +{ > + on_each_cpu(pv_cpu_reboot, NULL, 1); > + return NOTIFY_DONE; > +} > + > +static struct notifier_block pv_reboot_nb = { > + .notifier_call = pv_reboot_notify, > +}; > + > +int __init pv_time_init(void) > +{ > + int feature; > + > + if (!cpu_has_hypervisor) > + return 0; > + if (!kvm_para_available()) > + return 0; > + > + feature = read_cpucfg(CPUCFG_KVM_FEATURE); > + if (!(feature & KVM_FEATURE_STEAL_TIME)) > + return 0; > + > + has_steal_clock = 1; > + if (pv_enable_steal_time()) { > + has_steal_clock = 0; > + return 0; > + } > + > + register_reboot_notifier(&pv_reboot_nb); > + static_call_update(pv_steal_clock, para_steal_clock); > + static_key_slow_inc(¶virt_steal_enabled); > + if (steal_acc) > + static_key_slow_inc(¶virt_steal_rq_enabled); > + > +#ifdef CONFIG_SMP > + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, > + "loongarch/pvi_time:online", > + pv_time_cpu_online, pv_time_cpu_down_prepare) < 0) > + pr_err("Failed to install cpu hotplug callbacks\n"); > +#endif > + pr_info("Using stolen time PV\n"); > + return 0; > +} > diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c > index fd5354f9be7c..46d7d40c87e3 100644 > --- a/arch/loongarch/kernel/time.c > +++ b/arch/loongarch/kernel/time.c > @@ -15,6 +15,7 @@ > > #include <asm/cpu-features.h> > #include <asm/loongarch.h> > +#include <asm/paravirt.h> > #include <asm/time.h> > > u64 cpu_clock_freq; > @@ -214,4 +215,5 @@ void __init time_init(void) > > constant_clockevent_init(); > constant_clocksource_init(); > + pv_time_init(); > } > -- > 2.39.3 > >