On Tue, Sep 19, 2023 at 10:38 AM zhaotianrui <zhaotianrui@xxxxxxxxxxx> wrote: > > > 在 2023/9/17 下午12:21, Huacai Chen 写道: > > Hi, Tianrui, > > > > On Fri, Sep 15, 2023 at 9:50 AM Tianrui Zhao <zhaotianrui@xxxxxxxxxxx> wrote: > >> Implement LoongArch kvm module init, module exit interface, > >> using kvm context to save the vpid info and vcpu world switch > >> interface pointer. > >> > >> Reviewed-by: Bibo Mao <maobibo@xxxxxxxxxxx> > >> Signed-off-by: Tianrui Zhao <zhaotianrui@xxxxxxxxxxx> > >> --- > >> arch/loongarch/kvm/main.c | 367 ++++++++++++++++++++++++++++++++++++++ > >> 1 file changed, 367 insertions(+) > >> create mode 100644 arch/loongarch/kvm/main.c > >> > >> diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c > >> new file mode 100644 > >> index 0000000000..0deb9273d8 > >> --- /dev/null > >> +++ b/arch/loongarch/kvm/main.c > >> @@ -0,0 +1,367 @@ > >> +// SPDX-License-Identifier: GPL-2.0 > >> +/* > >> + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited > >> + */ > >> + > >> +#include <linux/err.h> > >> +#include <linux/module.h> > >> +#include <linux/kvm_host.h> > >> +#include <asm/cacheflush.h> > >> +#include <asm/cpufeature.h> > >> +#include <asm/kvm_csr.h> > >> +#include "trace.h" > >> + > >> +static struct kvm_context __percpu *vmcs; > >> +struct kvm_world_switch *kvm_loongarch_ops; > >> +unsigned long vpid_mask; > >> +static int gcsr_flag[CSR_MAX_NUMS]; > >> + > >> +int get_gcsr_flag(int csr) > >> +{ > >> + if (csr < CSR_MAX_NUMS) > >> + return gcsr_flag[csr]; > >> + > >> + return INVALID_GCSR; > >> +} > >> + > >> +static inline void set_gcsr_sw_flag(int csr) > >> +{ > >> + if (csr < CSR_MAX_NUMS) > >> + gcsr_flag[csr] |= SW_GCSR; > >> +} > >> + > >> +static inline void set_gcsr_hw_flag(int csr) > >> +{ > >> + if (csr < CSR_MAX_NUMS) > >> + gcsr_flag[csr] |= HW_GCSR; > >> +} > >> + > >> +/* > >> + * The default value of gcsr_flag[CSR] is 0, and we use this > >> + * function to set the flag to 1(SW_GCSR) or 2(HW_GCSR) if the > >> + * gcsr is software or hardware. It will be used by get/set_gcsr, > >> + * if gcsr_flag is HW we should use gcsrrd/gcsrwr to access it, > >> + * else use sw csr to emulate it. > >> + */ > >> +static void kvm_init_gcsr_flag(void) > >> +{ > >> + set_gcsr_hw_flag(LOONGARCH_CSR_CRMD); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRMD); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_EUEN); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_MISC); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_ECFG); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_ESTAT); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_ERA); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_BADV); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_BADI); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_EENTRY); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBIDX); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBEHI); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO0); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO1); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_ASID); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PGDL); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PGDH); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL0); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL1); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_STLBPGSIZE); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_RVACFG); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_CPUID); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG1); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG2); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG3); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS0); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS1); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS2); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS3); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS4); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS5); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS6); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS7); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TMID); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TCFG); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TVAL); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_CNTC); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_LLBCTL); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRENTRY); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRBADV); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRERA); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRSAVE); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO0); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO1); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBREHI); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRPRMD); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN0); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN1); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN2); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN3); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_MWPS); > >> + set_gcsr_hw_flag(LOONGARCH_CSR_FWPS); > >> + > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL1); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL2); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRCTL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO1); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO2); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRENTRY); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRERA); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRSAVE); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_CTAG); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DEBUG); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DERA); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DESAVE); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PRCFG1); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PRCFG2); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PRCFG3); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PGD); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_TINTCLR); > >> + > >> + set_gcsr_sw_flag(LOONGARCH_CSR_FWPS); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_FWPC); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MWPS); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_MWPC); > >> + > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7ASID); > >> + > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6ASID); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7ADDR); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7MASK); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7CTRL); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7ASID); > >> + > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL0); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR0); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL1); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR1); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL2); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR2); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL3); > >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR3); > >> +} > >> + > >> +static void kvm_update_vpid(struct kvm_vcpu *vcpu, int cpu) > >> +{ > >> + struct kvm_context *context; > >> + unsigned long vpid; > >> + > >> + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu); > >> + vpid = context->vpid_cache + 1; > >> + if (!(vpid & vpid_mask)) { > >> + /* finish round of 64 bit loop */ > >> + if (unlikely(!vpid)) > >> + vpid = vpid_mask + 1; > >> + > >> + /* vpid 0 reserved for root */ > >> + ++vpid; > >> + > >> + /* start new vpid cycle */ > >> + kvm_flush_tlb_all(); > >> + } > >> + > >> + context->vpid_cache = vpid; > >> + vcpu->arch.vpid = vpid; > >> +} > >> + > >> +void kvm_check_vpid(struct kvm_vcpu *vcpu) > >> +{ > >> + struct kvm_context *context; > >> + bool migrated; > >> + unsigned long ver, old, vpid; > >> + int cpu; > >> + > >> + cpu = smp_processor_id(); > >> + /* > >> + * Are we entering guest context on a different CPU to last time? > >> + * If so, the vCPU's guest TLB state on this CPU may be stale. > >> + */ > >> + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu); > >> + migrated = (vcpu->cpu != cpu); > >> + > >> + /* > >> + * Check if our vpid is of an older version > >> + * > >> + * We also discard the stored vpid if we've executed on > >> + * another CPU, as the guest mappings may have changed without > >> + * hypervisor knowledge. > >> + */ > >> + ver = vcpu->arch.vpid & ~vpid_mask; > >> + old = context->vpid_cache & ~vpid_mask; > >> + if (migrated || (ver != old)) { > >> + kvm_update_vpid(vcpu, cpu); > >> + trace_kvm_vpid_change(vcpu, vcpu->arch.vpid); > >> + vcpu->cpu = cpu; > >> + } > >> + > >> + /* Restore GSTAT(0x50).vpid */ > >> + vpid = (vcpu->arch.vpid & vpid_mask) << CSR_GSTAT_GID_SHIFT; > >> + change_csr_gstat(vpid_mask << CSR_GSTAT_GID_SHIFT, vpid); > >> +} > >> + > >> +static int kvm_loongarch_env_init(void) > >> +{ > >> + struct kvm_context *context; > >> + int cpu, order; > >> + void *addr; > >> + > >> + vmcs = alloc_percpu(struct kvm_context); > >> + if (!vmcs) { > >> + pr_err("kvm: failed to allocate percpu kvm_context\n"); > >> + return -ENOMEM; > >> + } > >> + > >> + kvm_loongarch_ops = kzalloc(sizeof(*kvm_loongarch_ops), GFP_KERNEL); > >> + if (!kvm_loongarch_ops) { > >> + free_percpu(vmcs); > >> + vmcs = NULL; > >> + return -ENOMEM; > >> + } > >> + /* > >> + * There will be problem in world switch code if there > >> + * is page fault reenter, since pgd register is shared > >> + * between root kernel and kvm hypervisor. World switch > >> + * entry need be unmapped area, cannot be tlb mapped area. > >> + * In future if hw pagetable walking is supported, or there > >> + * is separate pgd registers between root kernel and kvm > >> + * hypervisor, copying about world switch code will not be used. > >> + */ > >> + > >> + order = get_order(kvm_vector_size + kvm_enter_guest_size); > >> + addr = (void *)__get_free_pages(GFP_KERNEL, order); > >> + if (!addr) { > >> + free_percpu(vmcs); > >> + vmcs = NULL; > >> + kfree(kvm_loongarch_ops); > >> + kvm_loongarch_ops = NULL; > >> + return -ENOMEM; > >> + } > >> + > >> + memcpy(addr, kvm_vector_entry, kvm_vector_size); > >> + memcpy(addr + kvm_vector_size, kvm_enter_guest, kvm_enter_guest_size); > > Why memcpy? In our internal repo, we use kvm_vector_entry and > > kvm_enter_guest directly. The long comments above make me nervous > > because Loongson-3A6000 already supports hardware pagetable walker. > > > > Huacai > As mentioned in the comments, it need not this memcpy if hardware page > walk is supported in 3A6000. But why in our internal repo we don't use memcpy() but kvm can still work on Loongson-3A5000? Huacai > > Thanks > Tianrui Zhao > > > >> + flush_icache_range((unsigned long)addr, (unsigned long)addr + > >> + kvm_vector_size + kvm_enter_guest_size); > >> + kvm_loongarch_ops->guest_eentry = addr; > >> + kvm_loongarch_ops->enter_guest = addr + kvm_vector_size; > >> + kvm_loongarch_ops->page_order = order; > >> + > >> + vpid_mask = read_csr_gstat(); > >> + vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT; > >> + if (vpid_mask) > >> + vpid_mask = GENMASK(vpid_mask - 1, 0); > >> + > >> + for_each_possible_cpu(cpu) { > >> + context = per_cpu_ptr(vmcs, cpu); > >> + context->vpid_cache = vpid_mask + 1; > >> + context->last_vcpu = NULL; > >> + } > >> + > >> + kvm_init_fault(); > >> + kvm_init_gcsr_flag(); > >> + > >> + return 0; > >> +} > >> + > >> +static void kvm_loongarch_env_exit(void) > >> +{ > >> + unsigned long addr; > >> + > >> + if (vmcs) > >> + free_percpu(vmcs); > >> + > >> + if (kvm_loongarch_ops) { > >> + if (kvm_loongarch_ops->guest_eentry) { > >> + addr = (unsigned long)kvm_loongarch_ops->guest_eentry; > >> + free_pages(addr, kvm_loongarch_ops->page_order); > >> + } > >> + kfree(kvm_loongarch_ops); > >> + } > >> +} > >> + > >> +static int kvm_loongarch_init(void) > >> +{ > >> + int r; > >> + > >> + if (!cpu_has_lvz) { > >> + kvm_info("hardware virtualization not available\n"); > >> + return -ENODEV; > >> + } > >> + r = kvm_loongarch_env_init(); > >> + if (r) > >> + return r; > >> + > >> + return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); > >> +} > >> + > >> +static void kvm_loongarch_exit(void) > >> +{ > >> + kvm_exit(); > >> + kvm_loongarch_env_exit(); > >> +} > >> + > >> +module_init(kvm_loongarch_init); > >> +module_exit(kvm_loongarch_exit); > >> + > >> +#ifdef MODULE > >> +static const struct cpu_feature loongarch_kvm_feature[] = { > >> + { .feature = cpu_feature(LOONGARCH_LVZ) }, > >> + {}, > >> +}; > >> +MODULE_DEVICE_TABLE(cpu, loongarch_kvm_feature); > >> +#endif > >> -- > >> 2.39.1 > >> > >