Hi, Bibo, On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@xxxxxxxxxxx> wrote: > > PARAVIRT option and pv ipi is added on guest kernel side, function > pv_ipi_init() is to add ipi sending and ipi receiving hooks. This function > firstly checks whether system runs on VM mode. If kernel runs on VM mode, > it will call function kvm_para_available() to detect current hypervirsor > type. Now only KVM type detection is supported, the paravirt function can > work only if current hypervisor type is KVM, since there is only KVM > supported on LoongArch now. > > PV IPI uses virtual IPI sender and virtual IPI receiver function. With > virutal IPI sender, ipi message is stored in DDR memory rather than > emulated HW. IPI multicast is supported, and 128 vcpus can received IPIs > at the same time like X86 KVM method. Hypercall method is used for IPI > sending. > > With virtual IPI receiver, HW SW0 is used rather than real IPI HW. Since > VCPU has separate HW SW0 like HW timer, there is no trap in IPI interrupt > acknowledge. And IPI message is stored in DDR, no trap in get IPI message. > > Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx> > --- > arch/loongarch/Kconfig | 9 ++ > arch/loongarch/include/asm/hardirq.h | 1 + > arch/loongarch/include/asm/paravirt.h | 27 ++++ > .../include/asm/paravirt_api_clock.h | 1 + > arch/loongarch/kernel/Makefile | 1 + > arch/loongarch/kernel/irq.c | 2 +- > arch/loongarch/kernel/paravirt.c | 151 ++++++++++++++++++ > arch/loongarch/kernel/smp.c | 4 +- > 8 files changed, 194 insertions(+), 2 deletions(-) > create mode 100644 arch/loongarch/include/asm/paravirt.h > create mode 100644 arch/loongarch/include/asm/paravirt_api_clock.h > create mode 100644 arch/loongarch/kernel/paravirt.c > > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 54ad04dacdee..0a1540a8853e 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -583,6 +583,15 @@ config CPU_HAS_PREFETCH > bool > default y > > +config PARAVIRT > + bool "Enable paravirtualization code" > + depends on AS_HAS_LVZ_EXTENSION > + help > + This changes the kernel so it can modify itself when it is run > + under a hypervisor, potentially improving performance significantly > + over full virtualization. However, when run without a hypervisor > + the kernel is theoretically slower and slightly larger. > + > config ARCH_SUPPORTS_KEXEC > def_bool y > > diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h > index 9f0038e19c7f..b26d596a73aa 100644 > --- a/arch/loongarch/include/asm/hardirq.h > +++ b/arch/loongarch/include/asm/hardirq.h > @@ -21,6 +21,7 @@ enum ipi_msg_type { > typedef struct { > unsigned int ipi_irqs[NR_IPI]; > unsigned int __softirq_pending; > + atomic_t message ____cacheline_aligned_in_smp; > } ____cacheline_aligned irq_cpustat_t; > > DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); > diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h > new file mode 100644 > index 000000000000..58f7b7b89f2c > --- /dev/null > +++ b/arch/loongarch/include/asm/paravirt.h > @@ -0,0 +1,27 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_LOONGARCH_PARAVIRT_H > +#define _ASM_LOONGARCH_PARAVIRT_H > + > +#ifdef CONFIG_PARAVIRT > +#include <linux/static_call_types.h> > +struct static_key; > +extern struct static_key paravirt_steal_enabled; > +extern struct static_key paravirt_steal_rq_enabled; > + > +u64 dummy_steal_clock(int cpu); > +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); > + > +static inline u64 paravirt_steal_clock(int cpu) > +{ > + return static_call(pv_steal_clock)(cpu); > +} > + > +int pv_ipi_init(void); > +#else > +static inline int pv_ipi_init(void) > +{ > + return 0; > +} > + > +#endif // CONFIG_PARAVIRT > +#endif > diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h > new file mode 100644 > index 000000000000..65ac7cee0dad > --- /dev/null > +++ b/arch/loongarch/include/asm/paravirt_api_clock.h > @@ -0,0 +1 @@ > +#include <asm/paravirt.h> > diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile > index 3a7620b66bc6..c9bfeda89e40 100644 > --- a/arch/loongarch/kernel/Makefile > +++ b/arch/loongarch/kernel/Makefile > @@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o > obj-$(CONFIG_STACKTRACE) += stacktrace.o > > obj-$(CONFIG_PROC_FS) += proc.o > +obj-$(CONFIG_PARAVIRT) += paravirt.o > > obj-$(CONFIG_SMP) += smp.o > > diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c > index ce36897d1e5a..4863e6c1b739 100644 > --- a/arch/loongarch/kernel/irq.c > +++ b/arch/loongarch/kernel/irq.c > @@ -113,5 +113,5 @@ void __init init_IRQ(void) > per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); > } > > - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); > + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); > } > diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c > new file mode 100644 > index 000000000000..9044ed62045c > --- /dev/null > +++ b/arch/loongarch/kernel/paravirt.c > @@ -0,0 +1,151 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <linux/export.h> > +#include <linux/types.h> > +#include <linux/interrupt.h> > +#include <linux/jump_label.h> > +#include <linux/kvm_para.h> > +#include <asm/paravirt.h> > +#include <linux/static_call.h> > + > +struct static_key paravirt_steal_enabled; > +struct static_key paravirt_steal_rq_enabled; > + > +static u64 native_steal_clock(int cpu) > +{ > + return 0; > +} > + > +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); > + > +#ifdef CONFIG_SMP > +static void pv_send_ipi_single(int cpu, unsigned int action) > +{ > + unsigned int min, old; > + irq_cpustat_t *info = &per_cpu(irq_stat, cpu); > + > + old = atomic_fetch_or(BIT(action), &info->message); > + if (old) > + return; > + > + min = cpu_logical_map(cpu); > + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, 1, 0, min); > +} > + > +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) > +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) > +{ > + unsigned int cpu, i, min = 0, max = 0, old; > + __uint128_t bitmap = 0; > + irq_cpustat_t *info; > + > + if (cpumask_empty(mask)) > + return; > + > + action = BIT(action); > + for_each_cpu(i, mask) { > + info = &per_cpu(irq_stat, i); > + old = atomic_fetch_or(action, &info->message); > + if (old) > + continue; > + > + cpu = cpu_logical_map(i); > + if (!bitmap) { > + min = max = cpu; > + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) { > + max = cpu > max ? cpu : max; > + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) { > + bitmap <<= min - cpu; > + min = cpu; > + } else { > + /* > + * Physical cpuid is sorted in ascending order ascend > + * for the next mask calculation, send IPI here > + * directly and skip the remainding cpus > + */ > + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, > + (unsigned long)bitmap, > + (unsigned long)(bitmap >> BITS_PER_LONG), min); > + min = max = cpu; > + bitmap = 0; > + } I have changed the logic and comments when I apply, you can double check whether it is correct. Huacai > + __set_bit(cpu - min, (unsigned long *)&bitmap); > + } > + > + if (bitmap) > + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap, > + (unsigned long)(bitmap >> BITS_PER_LONG), min); > +} > + > +static irqreturn_t loongson_do_swi(int irq, void *dev) > +{ > + irq_cpustat_t *info; > + long action; > + > + /* Clear swi interrupt */ > + clear_csr_estat(1 << INT_SWI0); > + info = this_cpu_ptr(&irq_stat); > + action = atomic_xchg(&info->message, 0); > + if (action & SMP_CALL_FUNCTION) { > + generic_smp_call_function_interrupt(); > + info->ipi_irqs[IPI_CALL_FUNCTION]++; > + } > + > + if (action & SMP_RESCHEDULE) { > + scheduler_ipi(); > + info->ipi_irqs[IPI_RESCHEDULE]++; > + } > + > + return IRQ_HANDLED; > +} > + > +static void pv_init_ipi(void) > +{ > + int r, swi0; > + > + swi0 = get_percpu_irq(INT_SWI0); > + if (swi0 < 0) > + panic("SWI0 IRQ mapping failed\n"); > + irq_set_percpu_devid(swi0); > + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat); > + if (r < 0) > + panic("SWI0 IRQ request failed\n"); > +} > +#endif > + > +static bool kvm_para_available(void) > +{ > + static int hypervisor_type; > + int config; > + > + if (!hypervisor_type) { > + config = read_cpucfg(CPUCFG_KVM_SIG); > + if (!memcmp(&config, KVM_SIGNATURE, 4)) > + hypervisor_type = HYPERVISOR_KVM; > + } > + > + return hypervisor_type == HYPERVISOR_KVM; > +} > + > +int __init pv_ipi_init(void) > +{ > + int feature; > + > + if (!cpu_has_hypervisor) > + return 0; > + if (!kvm_para_available()) > + return 0; > + > + /* > + * check whether KVM hypervisor supports pv_ipi or not > + */ > + feature = read_cpucfg(CPUCFG_KVM_FEATURE); > +#ifdef CONFIG_SMP > + if (feature & KVM_FEATURE_PV_IPI) { > + smp_ops.init_ipi = pv_init_ipi; > + smp_ops.send_ipi_single = pv_send_ipi_single; > + smp_ops.send_ipi_mask = pv_send_ipi_mask; > + } > +#endif > + > + return 1; > +} > diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c > index 1fce775be4f6..9eff7aa4c552 100644 > --- a/arch/loongarch/kernel/smp.c > +++ b/arch/loongarch/kernel/smp.c > @@ -29,6 +29,7 @@ > #include <asm/loongson.h> > #include <asm/mmu_context.h> > #include <asm/numa.h> > +#include <asm/paravirt.h> > #include <asm/processor.h> > #include <asm/setup.h> > #include <asm/time.h> > @@ -309,6 +310,7 @@ void __init loongson_smp_setup(void) > cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; > cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; > > + pv_ipi_init(); > iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); > pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); > } > @@ -352,7 +354,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) > void loongson_init_secondary(void) > { > unsigned int cpu = smp_processor_id(); > - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | > + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | > ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; > > change_csr_ecfg(ECFG0_IM, imask); > -- > 2.39.3 >