Re: [PATCH v8 4/6] LoongArch: KVM: Add vcpu search support from physical cpuid

Huacai Chen <chenhuacai@xxxxxxxxxx> · Mon, 6 May 2024 09:49:42 +0800

Hi, Bibo,

On Sun, Apr 28, 2024 at 6:05 PM Bibo Mao <maobibo@xxxxxxxxxxx> wrote:
>
> Physical cpuid is used for interrupt routing for irqchips such as
> ipi/msi/extioi interrupt controller. And physical cpuid is stored
> at CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu
> is created and physical cpuid of two vcpus cannot be the same.
>
> Different irqchips have different size declaration about physical cpuid,
> max cpuid value for CSR LOONGARCH_CSR_CPUID on 3A5000 is 512, max cpuid
> supported by IPI hardware is 1024, 256 for extioi irqchip, and 65536
> for MSI irqchip.
>
> The smallest value from all interrupt controllers is selected now,
> and the max cpuid size is defines as 256 by KVM which comes from
> extioi irqchip.
>
> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
> ---
>  arch/loongarch/include/asm/kvm_host.h | 26 ++++++++
>  arch/loongarch/include/asm/kvm_vcpu.h |  1 +
>  arch/loongarch/kvm/vcpu.c             | 93 ++++++++++++++++++++++++++-
>  arch/loongarch/kvm/vm.c               | 11 ++++
>  4 files changed, 130 insertions(+), 1 deletion(-)
>
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index 2d62f7b0d377..3ba16ef1fe69 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -64,6 +64,30 @@ struct kvm_world_switch {
>
>  #define MAX_PGTABLE_LEVELS     4
>
> +/*
> + * Physical cpu id is used for interrupt routing, there are different
> + * definitions about physical cpuid on different hardwares.
> + *  For LOONGARCH_CSR_CPUID register, max cpuid size if 512
> + *  For IPI HW, max dest CPUID size 1024
> + *  For extioi interrupt controller, max dest CPUID size is 256
> + *  For MSI interrupt controller, max supported CPUID size is 65536
> + *
> + * Currently max CPUID is defined as 256 for KVM hypervisor, in future
> + * it will be expanded to 4096, including 16 packages at most. And every
> + * package supports at most 256 vcpus
> + */
> +#define KVM_MAX_PHYID          256
> +
> +struct kvm_phyid_info {
> +       struct kvm_vcpu *vcpu;
> +       bool            enabled;
> +};
> +
> +struct kvm_phyid_map {
> +       int max_phyid;
> +       struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
> +};
> +
>  struct kvm_arch {
>         /* Guest physical mm */
>         kvm_pte_t *pgd;
> @@ -71,6 +95,8 @@ struct kvm_arch {
>         unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
>         unsigned int  pte_shifts[MAX_PGTABLE_LEVELS];
>         unsigned int  root_level;
> +       spinlock_t    phyid_map_lock;
> +       struct kvm_phyid_map  *phyid_map;
>
>         s64 time_offset;
>         struct kvm_context __percpu *vmcs;
> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
> index 0cb4fdb8a9b5..9f53950959da 100644
> --- a/arch/loongarch/include/asm/kvm_vcpu.h
> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
> @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
>  void kvm_restore_timer(struct kvm_vcpu *vcpu);
>
>  int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
>
>  /*
>   * Loongarch KVM guest interrupt handling
> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> index 3a8779065f73..b633fd28b8db 100644
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -274,6 +274,95 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
>         return 0;
>  }
>
> +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
> +{
> +       int cpuid;
> +       struct loongarch_csrs *csr = vcpu->arch.csr;
> +       struct kvm_phyid_map  *map;
> +
> +       if (val >= KVM_MAX_PHYID)
> +               return -EINVAL;
> +
> +       cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
> +       map = vcpu->kvm->arch.phyid_map;
> +       spin_lock(&vcpu->kvm->arch.phyid_map_lock);
> +       if (map->phys_map[cpuid].enabled) {
> +               /*
> +                * Cpuid is already set before
> +                * Forbid changing different cpuid at runtime
> +                */
> +               if (cpuid != val) {
> +                       /*
> +                        * Cpuid 0 is initial value for vcpu, maybe invalid
> +                        * unset value for vcpu
> +                        */
> +                       if (cpuid) {
> +                               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> +                               return -EINVAL;
> +                       }
> +               } else {
> +                        /* Discard duplicated cpuid set */
> +                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> +                       return 0;
> +               }
> +       }
I have changed the logic and comments when I apply, you can double
check whether it is correct.

> +
> +       if (map->phys_map[val].enabled) {
> +               /*
> +                * New cpuid is already set with other vcpu
> +                * Forbid sharing the same cpuid between different vcpus
> +                */
> +               if (map->phys_map[val].vcpu != vcpu) {
> +                       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> +                       return -EINVAL;
> +               }
> +
> +               /* Discard duplicated cpuid set operation*/
> +               spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> +               return 0;
> +       }
> +
> +       kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
> +       map->phys_map[val].enabled      = true;
> +       map->phys_map[val].vcpu         = vcpu;
> +       if (map->max_phyid < val)
> +               map->max_phyid = val;
> +       spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
> +       return 0;
> +}
> +
> +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
> +{
> +       struct kvm_phyid_map  *map;
> +
> +       if (cpuid >= KVM_MAX_PHYID)
> +               return NULL;
> +
> +       map = kvm->arch.phyid_map;
> +       if (map->phys_map[cpuid].enabled)
> +               return map->phys_map[cpuid].vcpu;
> +
> +       return NULL;
> +}
> +
> +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
> +{
> +       int cpuid;
> +       struct loongarch_csrs *csr = vcpu->arch.csr;
> +       struct kvm_phyid_map  *map;
> +
> +       map = vcpu->kvm->arch.phyid_map;
> +       cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
> +       if (cpuid >= KVM_MAX_PHYID)
> +               return;
> +
> +       if (map->phys_map[cpuid].enabled) {
> +               map->phys_map[cpuid].vcpu = NULL;
> +               map->phys_map[cpuid].enabled = false;
> +               kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0);
> +       }
> +}
While kvm_set_cpuid() is protected by a spinlock, do kvm_drop_cpuid()
and kvm_get_vcpu_by_cpuid() also need it?

> +
>  static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>  {
>         int ret = 0, gintc;
> @@ -291,7 +380,8 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
>                 kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
>
>                 return ret;
> -       }
> +       } else if (id == LOONGARCH_CSR_CPUID)
> +               return kvm_set_cpuid(vcpu, val);
>
>         kvm_write_sw_gcsr(csr, id, val);
>
> @@ -943,6 +1033,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>         hrtimer_cancel(&vcpu->arch.swtimer);
>         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
>         kfree(vcpu->arch.csr);
> +       kvm_drop_cpuid(vcpu);
I think this line should be before the above kfree(), otherwise you
get a "use after free".

Huacai

>
>         /*
>          * If the vCPU is freed and reused as another vCPU, we don't want the
> diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
> index 0a37f6fa8f2d..6006a28653ad 100644
> --- a/arch/loongarch/kvm/vm.c
> +++ b/arch/loongarch/kvm/vm.c
> @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>         if (!kvm->arch.pgd)
>                 return -ENOMEM;
>
> +       kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map),
> +                               GFP_KERNEL_ACCOUNT);
> +       if (!kvm->arch.phyid_map) {
> +               free_page((unsigned long)kvm->arch.pgd);
> +               kvm->arch.pgd = NULL;
> +               return -ENOMEM;
> +       }
> +
>         kvm_init_vmcs(kvm);
>         kvm->arch.gpa_size = BIT(cpu_vabits - 1);
>         kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
> @@ -44,6 +52,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>         for (i = 0; i <= kvm->arch.root_level; i++)
>                 kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
>
> +       spin_lock_init(&kvm->arch.phyid_map_lock);
>         return 0;
>  }
>
> @@ -51,7 +60,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
>         kvm_destroy_vcpus(kvm);
>         free_page((unsigned long)kvm->arch.pgd);
> +       kvfree(kvm->arch.phyid_map);
>         kvm->arch.pgd = NULL;
> +       kvm->arch.phyid_map = NULL;
>  }
>
>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> --
> 2.39.3
>