On Thu, Dec 14, 2023 at 3:45 PM Andrew Jones <ajones@xxxxxxxxxxxxxxxx> wrote: > > When the SBI STA extension exists we can use it to implement > paravirt steal-time support. Fill in the empty pv-time functions > with an SBI STA implementation and add the Kconfig knobs allowing > it to be enabled. > > Signed-off-by: Andrew Jones <ajones@xxxxxxxxxxxxxxxx> > --- > arch/riscv/Kconfig | 19 ++++++++++ > arch/riscv/kernel/paravirt.c | 67 ++++++++++++++++++++++++++++++++++-- > 2 files changed, 83 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 95a2a06acc6a..b99fd8129edf 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -724,6 +724,25 @@ config COMPAT > > If you want to execute 32-bit userspace applications, say Y. > > +config PARAVIRT > + bool "Enable paravirtualization code" > + depends on RISCV_SBI > + help > + This changes the kernel so it can modify itself when it is run > + under a hypervisor, potentially improving performance significantly > + over full virtualization. > + > +config PARAVIRT_TIME_ACCOUNTING > + bool "Paravirtual steal time accounting" > + depends on PARAVIRT > + help > + Select this option to enable fine granularity task steal time > + accounting. Time spent executing other tasks in parallel with > + the current vCPU is discounted from the vCPU power. To account for > + that, there can be a small performance impact. > + > + If in doubt, say N here. > + > config RELOCATABLE > bool "Build a relocatable kernel" > depends on MMU && 64BIT && !XIP_KERNEL > diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c > index 141dbcc36fa2..b09dfd81bcd2 100644 > --- a/arch/riscv/kernel/paravirt.c > +++ b/arch/riscv/kernel/paravirt.c > @@ -6,12 +6,21 @@ > #define pr_fmt(fmt) "riscv-pv: " fmt > > #include <linux/cpuhotplug.h> > +#include <linux/compiler.h> > +#include <linux/errno.h> > #include <linux/init.h> > #include <linux/jump_label.h> > +#include <linux/kconfig.h> > +#include <linux/kernel.h> > +#include <linux/percpu-defs.h> > #include <linux/printk.h> > #include <linux/static_call.h> > #include <linux/types.h> > > +#include <asm/barrier.h> > +#include <asm/page.h> > +#include <asm/sbi.h> > + > struct static_key paravirt_steal_enabled; > struct static_key paravirt_steal_rq_enabled; > > @@ -31,24 +40,76 @@ static int __init parse_no_stealacc(char *arg) > > early_param("no-steal-acc", parse_no_stealacc); > > +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); > + > static bool __init has_pv_steal_clock(void) > { > + if (sbi_spec_version >= sbi_mk_version(2, 0) && > + sbi_probe_extension(SBI_EXT_STA) > 0) { > + pr_info("SBI STA extension detected\n"); > + return true; > + } > + > return false; > } > > -static int pv_time_cpu_online(unsigned int cpu) > +static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, > + unsigned long flags) > { > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, > + lo, hi, flags, 0, 0, 0); > + if (ret.error) { > + if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) > + pr_warn("Failed to disable steal-time shmem"); > + else > + pr_warn("Failed to set steal-time shmem"); > + return sbi_err_map_linux_errno(ret.error); > + } > + > return 0; > } > > +static int pv_time_cpu_online(unsigned int cpu) > +{ > + struct sbi_sta_struct *st = this_cpu_ptr(&steal_time); > + phys_addr_t pa = __pa(st); > + unsigned long lo = (unsigned long)pa; > + unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0; > + > + return sbi_sta_steal_time_set_shmem(lo, hi, 0); > +} > + > static int pv_time_cpu_down_prepare(unsigned int cpu) > { > - return 0; > + return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, > + SBI_STA_SHMEM_DISABLE, 0); > } > > static u64 pv_time_steal_clock(int cpu) > { > - return 0; > + struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu); > + u32 sequence; > + u64 steal; > + > + if (IS_ENABLED(CONFIG_32BIT)) { > + /* > + * Check the sequence field before and after reading the steal > + * field. Repeat the read if it is different or odd. > + */ > + do { > + sequence = READ_ONCE(st->sequence); > + virt_rmb(); > + steal = READ_ONCE(st->steal); > + virt_rmb(); > + } while ((le32_to_cpu(sequence) & 1) || > + sequence != READ_ONCE(st->sequence)); Actually, we should be doing this sequence for both RV64 and RV32 because for RV64 the steal time value is valid only when sequence is an even number. > + } else { > + steal = READ_ONCE(st->steal); > + } > + > + return le64_to_cpu(steal); > } > > int __init pv_time_init(void) > -- > 2.43.0 > Regards, Anup