Re: [PATCH v5 6/6] LoongArch: Add pv ipi support on LoongArch system

maobibo <maobibo@xxxxxxxxxxx> · Mon, 26 Feb 2024 10:30:47 +0800

On 2024/2/24 下午5:19, Huacai Chen wrote:
Hi, Bibo,

On Thu, Feb 22, 2024 at 11:28 AM Bibo Mao <maobibo@xxxxxxxxxxx> wrote:

On LoongArch system, ipi hw uses iocsr registers, there is one iocsr
register access on ipi sending, and two iocsr access on ipi receiving
which is ipi interrupt handler. On VM mode all iocsr accessing will
cause VM to trap into hypervisor. So with one ipi hw notification
there will be three times of trap.

PV ipi is added for VM, hypercall instruction is used for ipi sender,
and hypervisor will inject SWI to destination vcpu. During SWI interrupt
handler, only estat CSR register is written to clear irq. Estat CSR
register access will not trap into hypervisor. So with pv ipi supported,
there is one trap with pv ipi sender, and no trap with ipi receiver,
there is only one trap with ipi notification.

Also this patch adds ipi multicast support, the method is similar with
x86. With ipi multicast support, ipi notification can be sent to at most
128 vcpus at one time. It reduces trap times into hypervisor greatly.

Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
---
  arch/loongarch/include/asm/hardirq.h   |   1 +
  arch/loongarch/include/asm/kvm_host.h  |   1 +
  arch/loongarch/include/asm/kvm_para.h  | 123 +++++++++++++++++++++++++
  arch/loongarch/include/asm/loongarch.h |   1 +
  arch/loongarch/kernel/irq.c            |   2 +-
  arch/loongarch/kernel/paravirt.c       | 112 ++++++++++++++++++++++
  arch/loongarch/kernel/setup.c          |   1 +
  arch/loongarch/kernel/smp.c            |   2 +-
  arch/loongarch/kvm/exit.c              |  73 ++++++++++++++-
  arch/loongarch/kvm/vcpu.c              |   1 +
  10 files changed, 313 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 9f0038e19c7f..b26d596a73aa 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -21,6 +21,7 @@ enum ipi_msg_type {
  typedef struct {
         unsigned int ipi_irqs[NR_IPI];
         unsigned int __softirq_pending;
+       atomic_t message ____cacheline_aligned_in_smp;
  } ____cacheline_aligned irq_cpustat_t;

  DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 3ba16ef1fe69..0b96c6303cf7 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
         u64 idle_exits;
         u64 cpucfg_exits;
         u64 signal_exits;
+       u64 hypercall_exits;
  };

  #define KVM_MEM_HUGEPAGE_CAPABLE       (1UL << 0)
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index af5d677a9052..a82bffbbf8a1 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -8,6 +8,9 @@
  #define HYPERVISOR_KVM                 1
  #define HYPERVISOR_VENDOR_SHIFT                8
  #define HYPERCALL_CODE(vendor, code)   ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+#define KVM_HCALL_CODE_PV_SERVICE      0
+#define KVM_HCALL_PV_SERVICE           HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE)
+#define  KVM_HCALL_FUNC_PV_IPI         1

  /*
   * LoongArch hypercall return code
@@ -16,6 +19,126 @@
  #define KVM_HCALL_INVALID_CODE         -1UL
  #define KVM_HCALL_INVALID_PARAMETER    -2UL

+/*
+ * Hypercall interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+       register long ret asm("v0");
+       register unsigned long fun asm("a0") = fid;
+
+       __asm__ __volatile__(
+               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+               : "=r" (ret)
+               : "r" (fun)
+               : "memory"
+               );
+
+       return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+       register long ret asm("v0");
+       register unsigned long fun asm("a0") = fid;
+       register unsigned long a1  asm("a1") = arg0;
+
+       __asm__ __volatile__(
+               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+               : "=r" (ret)
+               : "r" (fun), "r" (a1)
+               : "memory"
+               );
+
+       return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+               unsigned long arg0, unsigned long arg1)
+{
+       register long ret asm("v0");
+       register unsigned long fun asm("a0") = fid;
+       register unsigned long a1  asm("a1") = arg0;
+       register unsigned long a2  asm("a2") = arg1;
+
+       __asm__ __volatile__(
+                       "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+                       : "=r" (ret)
+                       : "r" (fun), "r" (a1), "r" (a2)
+                       : "memory"
+                       );
+
+       return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+       unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+       register long ret asm("v0");
+       register unsigned long fun asm("a0") = fid;
+       register unsigned long a1  asm("a1") = arg0;
+       register unsigned long a2  asm("a2") = arg1;
+       register unsigned long a3  asm("a3") = arg2;
+
+       __asm__ __volatile__(
+               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+               : "=r" (ret)
+               : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
+               : "memory"
+               );
+
+       return ret;
+}
+
+static __always_inline long kvm_hypercall4(u64 fid,
+               unsigned long arg0, unsigned long arg1, unsigned long arg2,
+               unsigned long arg3)
+{
+       register long ret asm("v0");
+       register unsigned long fun asm("a0") = fid;
+       register unsigned long a1  asm("a1") = arg0;
+       register unsigned long a2  asm("a2") = arg1;
+       register unsigned long a3  asm("a3") = arg2;
+       register unsigned long a4  asm("a4") = arg3;
+
+       __asm__ __volatile__(
+               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+               : "=r" (ret)
+               : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
+               : "memory"
+               );
+
+       return ret;
+}
+
+static __always_inline long kvm_hypercall5(u64 fid,
+               unsigned long arg0, unsigned long arg1, unsigned long arg2,
+               unsigned long arg3, unsigned long arg4)
+{
+       register long ret asm("v0");
+       register unsigned long fun asm("a0") = fid;
+       register unsigned long a1  asm("a1") = arg0;
+       register unsigned long a2  asm("a2") = arg1;
+       register unsigned long a3  asm("a3") = arg2;
+       register unsigned long a4  asm("a4") = arg3;
+       register unsigned long a5  asm("a5") = arg4;
+
+       __asm__ __volatile__(
+               "hvcl "__stringify(KVM_HCALL_PV_SERVICE)
+               : "=r" (ret)
+               : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
+               : "memory"
+               );
+
+       return ret;
+}
+
+
  static inline unsigned int kvm_arch_para_features(void)
  {
         return 0;
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index a1d22e8b6f94..0ad36704cb4b 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -167,6 +167,7 @@
  #define CPUCFG_KVM_SIG                 CPUCFG_KVM_BASE
  #define  KVM_SIGNATURE                 "KVM\0"
  #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
+#define  KVM_FEATURE_PV_IPI            BIT(1)

  #ifndef __ASSEMBLY__

diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index ce36897d1e5a..4863e6c1b739 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
                         per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
         }

-       set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+       set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
  }
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 5cf794e8490f..4c30e1c73c72 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -1,6 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0
  #include <linux/export.h>
  #include <linux/types.h>
+#include <linux/interrupt.h>
  #include <linux/jump_label.h>
  #include <linux/kvm_para.h>
  #include <asm/paravirt.h>
@@ -16,6 +17,103 @@ static u64 native_steal_clock(int cpu)

  DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);

+#ifdef CONFIG_SMP
+static void pv_send_ipi_single(int cpu, unsigned int action)
+{
+       unsigned int min, old;
+       unsigned long bitmap = 0;
+       irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+
+       action = BIT(action);
+       old = atomic_fetch_or(action, &info->message);
+       if (old == 0) {
+               min = cpu_logical_map(cpu);
+               bitmap = 1;
+               kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, bitmap, 0, min);
+       }
Early return style can make it a little easy, which means:

if (old)
    return;

min = ......

will do in next patch.

+}
+
+#define KVM_IPI_CLUSTER_SIZE           (2 * BITS_PER_LONG)
+static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+       unsigned int cpu, i, min = 0, max = 0, old;
+       __uint128_t bitmap = 0;
+       irq_cpustat_t *info;
+
+       if (cpumask_empty(mask))
+               return;
+
+       action = BIT(action);
+       for_each_cpu(i, mask) {
+               info = &per_cpu(irq_stat, i);
+               old = atomic_fetch_or(action, &info->message);
+               if (old)
+                       continue;
+
+               cpu = cpu_logical_map(i);
+               if (!bitmap) {
+                       min = max = cpu;
+               } else if (cpu > min && cpu < min + KVM_IPI_CLUSTER_SIZE) {
+                       max = cpu > max ? cpu : max;
+               } else if (cpu < min && (max - cpu) < KVM_IPI_CLUSTER_SIZE) {
+                       bitmap <<= min - cpu;
+                       min = cpu;
+               } else {
+                       /*
+                        * Physical cpuid is sorted in ascending order ascend
+                        * for the next mask calculation, send IPI here
+                        * directly and skip the remainding cpus
+                        */
+                       kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI,
+                               (unsigned long)bitmap,
+                               (unsigned long)(bitmap >> BITS_PER_LONG), min);
+                       min = max = cpu;
+                       bitmap = 0;
+               }
+               __set_bit(cpu - min, (unsigned long *)&bitmap);
+       }
+
+       if (bitmap)
+               kvm_hypercall3(KVM_HCALL_FUNC_PV_IPI, (unsigned long)bitmap,
+                               (unsigned long)(bitmap >> BITS_PER_LONG), min);
+}
+
+static irqreturn_t loongson_do_swi(int irq, void *dev)
+{
+       irq_cpustat_t *info;
+       long action;
+
+       /* Clear swi interrupt */
+       clear_csr_estat(1 << INT_SWI0);
+       info = this_cpu_ptr(&irq_stat);
+       action = atomic_xchg(&info->message, 0);
+       if (action & SMP_CALL_FUNCTION) {
+               generic_smp_call_function_interrupt();
+               info->ipi_irqs[IPI_CALL_FUNCTION]++;
+       }
+
+       if (action & SMP_RESCHEDULE) {
+               scheduler_ipi();
+               info->ipi_irqs[IPI_RESCHEDULE]++;
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void pv_init_ipi(void)
+{
+       int r, swi0;
+
+       swi0 = get_percpu_irq(INT_SWI0);
+       if (swi0 < 0)
+               panic("SWI0 IRQ mapping failed\n");
+       irq_set_percpu_devid(swi0);
+       r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
+       if (r < 0)
+               panic("SWI0 IRQ request failed\n");
+}
+#endif
+
  static bool kvm_para_available(void)
  {
         static int hypervisor_type;
@@ -32,10 +130,24 @@ static bool kvm_para_available(void)

  int __init pv_ipi_init(void)
  {
+       int feature;
+
         if (!cpu_has_hypervisor)
                 return 0;
         if (!kvm_para_available())
                 return 0;

+       /*
+        * check whether KVM hypervisor supports pv_ipi or not
+        */
+       feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+#ifdef CONFIG_SMP
+       if (feature & KVM_FEATURE_PV_IPI) {
+               smp_ops.init_ipi                = pv_init_ipi;
+               smp_ops.send_ipi_single         = pv_send_ipi_single;
+               smp_ops.send_ipi_mask           = pv_send_ipi_mask;
+       }
+#endif
+
         return 1;
  }
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index b79a1244b56f..c95ed3224b7d 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -368,6 +368,7 @@ void __init platform_init(void)
         pr_info("The BIOS Version: %s\n", b_info.bios_version);

         efi_runtime_init();
+       pv_ipi_init();
Move the callsite to loongson_smp_setup() is better.
Will do in next patch.

Regards
Bibo Mao

Huacai

  }

  static void __init check_kernel_sections_mem(void)
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 2182e7cc2ed6..9e9fda1fe18a 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -285,7 +285,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
  void loongson_init_secondary(void)
  {
         unsigned int cpu = smp_processor_id();
-       unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
+       unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
                              ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;

         change_csr_ecfg(ECFG0_IM, imask);
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 6a38fd59d86d..46940e97975b 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
         case CPUCFG_KVM_SIG:
                 vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
                 break;
+       case CPUCFG_KVM_FEATURE:
+               vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
+               break;
         default:
                 vcpu->arch.gprs[rd] = 0;
                 break;
@@ -699,12 +702,78 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
         return RESUME_GUEST;
  }

+static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
+{
+       unsigned long ipi_bitmap;
+       unsigned int min, cpu, i;
+       struct kvm_vcpu *dest;
+
+       min = vcpu->arch.gprs[LOONGARCH_GPR_A3];
+       for (i = 0; i < 2; i++) {
+               ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i];
+               if (!ipi_bitmap)
+                       continue;
+
+               cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
+               while (cpu < BITS_PER_LONG) {
+                       dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
+                       cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG,
+                                       cpu + 1);
+                       if (!dest)
+                               continue;
+
+                       /*
+                        * Send SWI0 to dest vcpu to emulate IPI interrupt
+                        */
+                       kvm_queue_irq(dest, INT_SWI0);
+                       kvm_vcpu_kick(dest);
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * hypercall emulation always return to guest, Caller should check retval.
+ */
+static void kvm_handle_pv_service(struct kvm_vcpu *vcpu)
+{
+       unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
+       long ret;
+
+       switch (func) {
+       case KVM_HCALL_FUNC_PV_IPI:
+               kvm_pv_send_ipi(vcpu);
+               ret = KVM_HCALL_STATUS_SUCCESS;
+               break;
+       default:
+               ret = KVM_HCALL_INVALID_CODE;
+               break;
+       };
+
+       vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
+}
+
  static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
  {
+       larch_inst inst;
+       unsigned int code;
+
+       inst.word = vcpu->arch.badi;
+       code = inst.reg0i15_format.immediate;
         update_pc(&vcpu->arch);

-       /* Treat it as noop intruction, only set return value */
-       vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
+       switch (code) {
+       case KVM_HCALL_PV_SERVICE:
+               vcpu->stat.hypercall_exits++;
+               kvm_handle_pv_service(vcpu);
+               break;
+       default:
+               /* Treat it as noop intruction, only set return value */
+               vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE;
+               break;
+       }
+
         return RESUME_GUEST;
  }

diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 40296d8ef297..24fd5e4647f3 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
         STATS_DESC_COUNTER(VCPU, idle_exits),
         STATS_DESC_COUNTER(VCPU, cpucfg_exits),
         STATS_DESC_COUNTER(VCPU, signal_exits),
+       STATS_DESC_COUNTER(VCPU, hypercall_exits)
  };

  const struct kvm_stats_header kvm_vcpu_stats_header = {
--
2.39.3