Hi, Bibo, On Thu, Feb 22, 2024 at 11:28 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote: > > On LoongArch system, ipi hw uses iocsr registers, there is one iocsr > register access on ipi sending, and two iocsr access on ipi receiving > which is ipi interrupt handler. On VM mode all iocsr accessing will > cause VM to trap into hypervisor. So with one ipi hw notification > there will be three times of trap. > > PV ipi is added for VM, hypercall instruction is used for ipi sender, > and hypervisor will inject SWI to destination vcpu. During SWI interrupt > handler, only estat CSR register is written to clear irq. Estat CSR > register access will not trap into hypervisor. So with pv ipi supported, > there is one trap with pv ipi sender, and no trap with ipi receiver, > there is only one trap with ipi notification. > > Also this patch adds ipi multicast support, the method is similar with > x86. With ipi multicast support, ipi notification can be sent to at most > 128 vcpus at one time. It reduces trap times into hypervisor greatly. > > Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx> > --- > arch/loongarch/include/asm/hardirq.h | 1 + > arch/loongarch/include/asm/kvm_host.h | 1 + > arch/loongarch/include/asm/kvm_para.h | 123 +++++++++++++++++++++++++ > arch/loongarch/include/asm/loongarch.h | 1 + > arch/loongarch/kernel/irq.c | 2 +- > arch/loongarch/kernel/paravirt.c | 112 ++++++++++++++++++++++ > arch/loongarch/kernel/setup.c | 1 + > arch/loongarch/kernel/smp.c | 2 +- > arch/loongarch/kvm/exit.c | 73 ++++++++++++++- > arch/loongarch/kvm/vcpu.c | 1 + > 10 files changed, 313 insertions(+), 4 deletions(-) > > diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h > index 9f0038e19c7f..b26d596a73aa 100644 > --- a/arch/loongarch/include/asm/hardirq.h > +++ b/arch/loongarch/include/asm/hardirq.h > @@ -21,6 +21,7 @@ enum ipi_msg_type { > typedef struct { > unsigned int ipi_irqs[NR_IPI]; > unsigned int __softirq_pending; > + atomic_t message ____cacheline_aligned_in_smp; > } ____cacheline_aligned irq_cpustat_t; > > DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); > diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h > index 3ba16ef1fe69..0b96c6303cf7 100644 > --- a/arch/loongarch/include/asm/kvm_host.h > +++ b/arch/loongarch/include/asm/kvm_host.h > @@ -43,6 +43,7 @@ struct kvm_vcpu_stat { > u64 idle_exits; > u64 cpucfg_exits; > u64 signal_exits; > + u64 hypercall_exits; > }; > > #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0) > diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h > index af5d677a9052..a82bffbbf8a1 100644 > --- a/arch/loongarch/include/asm/kvm_para.h > +++ b/arch/loongarch/include/asm/kvm_para.h > @@ -8,6 +8,9 @@ > #define HYPERVISOR_KVM 1 > #define HYPERVISOR_VENDOR_SHIFT 8 > #define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) > +#define KVM_HCALL_CODE_PV_SERVICE 0 > +#define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) > +#define KVM_HCALL_FUNC_PV_IPI 1 > > /* > * LoongArch hypercall return code > @@ -16,6 +19,126 @@ > #define KVM_HCALL_INVALID_CODE -1UL > #define KVM_HCALL_INVALID_PARAMETER -2UL > > +/* > + * Hypercall interface for KVM hypervisor > + * > + * a0: function identifier > + * a1-a6: args > + * Return value will be placed in v0. > + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. > + */ > +static __always_inline long kvm_hypercall(u64 fid) > +{ > + register long ret asm("v0"); > + register unsigned long fun asm("a0") = fid; > + > + __asm__ __volatile__( > + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) > + : "=r" (ret) > + : "r" (fun) > + : "memory" > + ); > + > + return ret; > +} > + > +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) > +{ > + register long ret asm("v0"); > + register unsigned long fun asm("a0") = fid; > + register unsigned long a1 asm("a1") = arg0; > + > + __asm__ __volatile__( > + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) > + : "=r" (ret) > + : "r" (fun), "r" (a1) > + : "memory" > + ); > + > + return ret; > +} > + > +static __always_inline long kvm_hypercall2(u64 fid, > + unsigned long arg0, unsigned long arg1) > +{ > + register long ret asm("v0"); > + register unsigned long fun asm("a0") = fid; > + register unsigned long a1 asm("a1") = arg0; > + register unsigned long a2 asm("a2") = arg1; > + > + __asm__ __volatile__( > + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) > + : "=r" (ret) > + : "r" (fun), "r" (a1), "r" (a2) > + : "memory" > + ); > + > + return ret; > +} > + > +static __always_inline long kvm_hypercall3(u64 fid, > + unsigned long arg0, unsigned long arg1, unsigned long arg2) > +{ > + register long ret asm("v0"); > + register unsigned long fun asm("a0") = fid; > + register unsigned long a1 asm("a1") = arg0; > + register unsigned long a2 asm("a2") = arg1; > + register unsigned long a3 asm("a3") = arg2; > + > + __asm__ __volatile__( > + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) > + : "=r" (ret) > + : "r" (fun), "r" (a1), "r" (a2), "r" (a3) > + : "memory" > + ); > + > + return ret; > +} > + > +static __always_inline long kvm_hypercall4(u64 fid, > + unsigned long arg0, unsigned long arg1, unsigned long arg2, > + unsigned long arg3) > +{ > + register long ret asm("v0"); > + register unsigned long fun asm("a0") = fid; > + register unsigned long a1 asm("a1") = arg0; > + register unsigned long a2 asm("a2") = arg1; > + register unsigned long a3 asm("a3") = arg2; > + register unsigned long a4 asm("a4") = arg3; > + > + __asm__ __volatile__( > + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) > + : "=r" (ret) > + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) > + : "memory" > + ); > + > + return ret; > +} > + > +static __always_inline long kvm_hypercall5(u64 fid, > + unsigned long arg0, unsigned long arg1, unsigned long arg2, > + unsigned long arg3, unsigned long arg4) > +{ > + register long ret asm("v0"); > + register unsigned long fun asm("a0") = fid; > + register unsigned long a1 asm("a1") = arg0; > + register unsigned long a2 asm("a2") = arg1; > + register unsigned long a3 asm("a3") = arg2; > + register unsigned long a4 asm("a4") = arg3; > + register unsigned long a5 asm("a5") = arg4; > + > + __asm__ __volatile__( > + "hvcl "__stringify(KVM_HCALL_PV_SERVICE) > + : "=r" (ret) > + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) > + : "memory" > + ); > + > + return ret; > +} > + > + > static inline unsigned int kvm_arch_para_features(void) > { > return 0; > diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h > index a1d22e8b6f94..0ad36704cb4b 100644 > --- a/arch/loongarch/include/asm/loongarch.h > +++ b/arch/loongarch/include/asm/loongarch.h > @@ -167,6 +167,7 @@ > #define CPUCFG_KVM_SIG CPUCFG_KVM_BASE > #define KVM_SIGNATURE "KVM\0" > #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) > +#define KVM_FEATURE_PV_IPI BIT(1) > > #ifndef __ASSEMBLY__ > > diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c > index ce36897d1e5a..4863e6c1b739 100644 > --- a/arch/loongarch/kernel/irq.c > +++ b/arch/loongarch/kernel/irq.c > @@ -113,5 +113,5 @@ void __init init_IRQ(void) > per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); > } > > - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); > + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); > } > diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c > index 5cf794e8490f..4c30e1c73c72 100644 > --- a/arch/loongarch/kernel/paravirt.c > +++ b/arch/loongarch/kernel/paravirt.c > @@ -1,6 +1,7 @@ > // SPDX-License-Identifier: GPL-2.0 > #include <linux/export.h> > #include <linux/types.h> > +#include <linux/interrupt.h> > #include <linux/jump_label.h> > #include <linux/kvm_para.h> > #include <asm/paravirt.h> > @@ -16,6 +17,103 @@ static u64 native_steal_clock(int cpu) > > DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); > > +#ifdef CONFIG_SMP > +static void pv_send_ipi_single(int cpu, unsigned int action) > +{ > + unsigned int min, old; > + unsigned long bitmap = 0; > + irq_cpustat_t *info = &per_cpu(irq_stat, cpu); > + > + action = BIT(action); > + old = atomic_fetch_or(action, &info->message); > + if (old == 0) { > + min = cpu_logical_map(cpu); > + bitmap = 1; > + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, bitmap, 0, min); > + } Early return style can make it a little easy, which means: if (old) return; min = ...... > +} > + > +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) > +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) > +{ > + unsigned int cpu, i, min = 0, max = 0, old; > + __uint128_t bitmap = 0; > + irq_cpustat_t *info; > + > + if (cpumask_empty(mask)) > + return; > + > + action = BIT(action); > + for_each_cpu(i, mask) { > + info = &per_cpu(irq_stat, i); > + old = atomic_fetch_or(action, &info->message); > + if (old) > + continue; > + > + cpu = cpu_logical_map(i); > + if (!bitmap) { > + min = max = cpu; > + } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) { > + max = cpu > max ? cpu : max; > + } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) { > + bitmap <<= min - cpu; > + min = cpu; > + } else { > + /* > + * Physical cpuid is sorted in ascending order ascend > + * for the next mask calculation, send IPI here > + * directly and skip the remainding cpus > + */ > + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, > + (unsigned long)bitmap, > + (unsigned long)(bitmap >> BITS_PER_LONG), min); > + min = max = cpu; > + bitmap = 0; > + } > + __set_bit(cpu - min, (unsigned long *)&bitmap); > + } > + > + if (bitmap) > + kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap, > + (unsigned long)(bitmap >> BITS_PER_LONG), min); > +} > + > +static irqreturn_t loongson_do_swi(int irq, void *dev) > +{ > + irq_cpustat_t *info; > + long action; > + > + /* Clear swi interrupt */ > + clear_csr_estat(1 << INT_SWI0); > + info = this_cpu_ptr(&irq_stat); > + action = atomic_xchg(&info->message, 0); > + if (action & SMP_CALL_FUNCTION) { > + generic_smp_call_function_interrupt(); > + info->ipi_irqs[IPI_CALL_FUNCTION]++; > + } > + > + if (action & SMP_RESCHEDULE) { > + scheduler_ipi(); > + info->ipi_irqs[IPI_RESCHEDULE]++; > + } > + > + return IRQ_HANDLED; > +} > + > +static void pv_init_ipi(void) > +{ > + int r, swi0; > + > + swi0 = get_percpu_irq(INT_SWI0); > + if (swi0 < 0) > + panic("SWI0 IRQ mapping failed\n"); > + irq_set_percpu_devid(swi0); > + r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat); > + if (r < 0) > + panic("SWI0 IRQ request failed\n"); > +} > +#endif > + > static bool kvm_para_available(void) > { > static int hypervisor_type; > @@ -32,10 +130,24 @@ static bool kvm_para_available(void) > > int __init pv_ipi_init(void) > { > + int feature; > + > if (!cpu_has_hypervisor) > return 0; > if (!kvm_para_available()) > return 0; > > + /* > + * check whether KVM hypervisor supports pv_ipi or not > + */ > + feature = read_cpucfg(CPUCFG_KVM_FEATURE); > +#ifdef CONFIG_SMP > + if (feature & KVM_FEATURE_PV_IPI) { > + smp_ops.init_ipi = pv_init_ipi; > + smp_ops.send_ipi_single = pv_send_ipi_single; > + smp_ops.send_ipi_mask = pv_send_ipi_mask; > + } > +#endif > + > return 1; > } > diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c > index b79a1244b56f..c95ed3224b7d 100644 > --- a/arch/loongarch/kernel/setup.c > +++ b/arch/loongarch/kernel/setup.c > @@ -368,6 +368,7 @@ void __init platform_init(void) > pr_info("The BIOS Version: %s\n", b_info.bios_version); > > efi_runtime_init(); > + pv_ipi_init(); Move the callsite to loongson_smp_setup() is better. Huacai > } > > static void __init check_kernel_sections_mem(void) > diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c > index 2182e7cc2ed6..9e9fda1fe18a 100644 > --- a/arch/loongarch/kernel/smp.c > +++ b/arch/loongarch/kernel/smp.c > @@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) > void loongson_init_secondary(void) > { > unsigned int cpu = smp_processor_id(); > - unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | > + unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | > ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; > > change_csr_ecfg(ECFG0_IM, imask); > diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c > index 6a38fd59d86d..46940e97975b 100644 > --- a/arch/loongarch/kvm/exit.c > +++ b/arch/loongarch/kvm/exit.c > @@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) > case CPUCFG_KVM_SIG: > vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; > break; > + case CPUCFG_KVM_FEATURE: > + vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI; > + break; > default: > vcpu->arch.gprs[rd] = 0; > break; > @@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) > return RESUME_GUEST; > } > > +static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) > +{ > + unsigned long ipi_bitmap; > + unsigned int min, cpu, i; > + struct kvm_vcpu *dest; > + > + min = vcpu->arch.gprs[LOONGARCH_GPR_A3]; > + for (i = 0; i < 2; i++) { > + ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i]; > + if (!ipi_bitmap) > + continue; > + > + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); > + while (cpu < BITS_PER_LONG) { > + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); > + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, > + cpu + 1); > + if (!dest) > + continue; > + > + /* > + * Send SWI0 to dest vcpu to emulate IPI interrupt > + */ > + kvm_queue_irq(dest, INT_SWI0); > + kvm_vcpu_kick(dest); > + } > + } > + > + return 0; > +} > + > +/* > + * hypercall emulation always return to guest, Caller should check retval. > + */ > +static void kvm_handle_pv_service(struct kvm_vcpu *vcpu) > +{ > + unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0]; > + long ret; > + > + switch (func) { > + case KVM_HCALL_FUNC_PV_IPI: > + kvm_pv_send_ipi(vcpu); > + ret = KVM_HCALL_STATUS_SUCCESS; > + break; > + default: > + ret = KVM_HCALL_INVALID_CODE; > + break; > + }; > + > + vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret; > +} > + > static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) > { > + larch_inst inst; > + unsigned int code; > + > + inst.word = vcpu->arch.badi; > + code = inst.reg0i15_format.immediate; > update_pc(&vcpu->arch); > > - /* Treat it as noop intruction, only set return value */ > - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; > + switch (code) { > + case KVM_HCALL_PV_SERVICE: > + vcpu->stat.hypercall_exits++; > + kvm_handle_pv_service(vcpu); > + break; > + default: > + /* Treat it as noop intruction, only set return value */ > + vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; > + break; > + } > + > return RESUME_GUEST; > } > > diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c > index 40296d8ef297..24fd5e4647f3 100644 > --- a/arch/loongarch/kvm/vcpu.c > +++ b/arch/loongarch/kvm/vcpu.c > @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { > STATS_DESC_COUNTER(VCPU, idle_exits), > STATS_DESC_COUNTER(VCPU, cpucfg_exits), > STATS_DESC_COUNTER(VCPU, signal_exits), > + STATS_DESC_COUNTER(VCPU, hypercall_exits) > }; > > const struct kvm_stats_header kvm_vcpu_stats_header = { > -- > 2.39.3 > >