Re: [PATCH v10 05/19] RISC-V: KVM: Implement VCPU create, init and destroy functions

Anup Patel <anup@xxxxxxxxxxxxxx> · Thu, 23 Jan 2020 09:04:46 +0530

On Thu, Jan 23, 2020 at 12:34 AM Palmer Dabbelt
<palmerdabbelt@xxxxxxxxxx> wrote:
>
> On Mon, 23 Dec 2019 03:35:55 PST (-0800), Anup Patel wrote:
> > This patch implements VCPU create, init and destroy functions
> > required by generic KVM module. We don't have much dynamic
> > resources in struct kvm_vcpu_arch so these functions are quite
> > simple for KVM RISC-V.
> >
> > Signed-off-by: Anup Patel <anup.patel@xxxxxxx>
> > Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> > Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> > Reviewed-by: Alexander Graf <graf@xxxxxxxxxx>
> > ---
> >  arch/riscv/include/asm/kvm_host.h | 68 +++++++++++++++++++++++++++++++
> >  arch/riscv/kvm/vcpu.c             | 68 +++++++++++++++++++++++++++++--
> >  2 files changed, 132 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
> > index e0b633f616a4..297431660be6 100644
> > --- a/arch/riscv/include/asm/kvm_host.h
> > +++ b/arch/riscv/include/asm/kvm_host.h
> > @@ -53,7 +53,75 @@ struct kvm_arch {
> >       phys_addr_t pgd_phys;
> >  };
> >
> > +struct kvm_cpu_context {
> > +     unsigned long zero;
> > +     unsigned long ra;
> > +     unsigned long sp;
> > +     unsigned long gp;
> > +     unsigned long tp;
> > +     unsigned long t0;
> > +     unsigned long t1;
> > +     unsigned long t2;
> > +     unsigned long s0;
> > +     unsigned long s1;
> > +     unsigned long a0;
> > +     unsigned long a1;
> > +     unsigned long a2;
> > +     unsigned long a3;
> > +     unsigned long a4;
> > +     unsigned long a5;
> > +     unsigned long a6;
> > +     unsigned long a7;
> > +     unsigned long s2;
> > +     unsigned long s3;
> > +     unsigned long s4;
> > +     unsigned long s5;
> > +     unsigned long s6;
> > +     unsigned long s7;
> > +     unsigned long s8;
> > +     unsigned long s9;
> > +     unsigned long s10;
> > +     unsigned long s11;
> > +     unsigned long t3;
> > +     unsigned long t4;
> > +     unsigned long t5;
> > +     unsigned long t6;
> > +     unsigned long sepc;
> > +     unsigned long sstatus;
> > +     unsigned long hstatus;
> > +};
>
> Looks like pretty much everyone else is putting 'struct kvm_regs' at the start
> of 'struct kvm_cpu_context', which I'm assuming avoids duplication when
> implementing KVM_{GET,SET}_REGS.  That would probably be cleaner, but if I
> think it'd be best to just have this match our mcontext.

The "struct kvm_cpu_context" is internal to KVM RISC-V whereas the
"struct kvm_vcpu_arch" is expected by core KVM

>
> > +
> > +struct kvm_vcpu_csr {
> > +     unsigned long vsstatus;
> > +     unsigned long hie;
> > +     unsigned long vstvec;
> > +     unsigned long vsscratch;
> > +     unsigned long vsepc;
> > +     unsigned long vscause;
> > +     unsigned long vstval;
> > +     unsigned long hip;
> > +     unsigned long vsatp;
> > +};
> > +
> >  struct kvm_vcpu_arch {
> > +     /* VCPU ran atleast once */
> > +     bool ran_atleast_once;
> > +
> > +     /* ISA feature bits (similar to MISA) */
> > +     unsigned long isa;
> > +
> > +     /* CPU context of Guest VCPU */
> > +     struct kvm_cpu_context guest_context;
> > +
> > +     /* CPU CSR context of Guest VCPU */
> > +     struct kvm_vcpu_csr guest_csr;
>
> It looks like other architectures either put the entire CPU state in 'struct
> kvm_cpu_context' (arm64, for example) or inline all the definitions (mips, for
> example).  I'd lean the arm64 way here, but I haven't gotten sufficiently far
> until the user ABI to figure out if this would help in a meaningful fashion.

It's an implementation choice. We don't need flow KVM ARM64.

In fact, having separate "struct kvm_cpu_context" and "struct kvm_vcpu_csr"
is much cleaner.

>
> > +     /* CPU context upon Guest VCPU reset */
> > +     struct kvm_cpu_context guest_reset_context;
> > +
> > +     /* CPU CSR context upon Guest VCPU reset */
> > +     struct kvm_vcpu_csr guest_reset_csr;
> > +
> >       /* Don't run the VCPU (blocked) */
> >       bool pause;
> >
> > diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> > index bd7d6b154f61..cf8ca8d4a9ea 100644
> > --- a/arch/riscv/kvm/vcpu.c
> > +++ b/arch/riscv/kvm/vcpu.c
> > @@ -31,10 +31,48 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> >       { NULL }
> >  };
> >
> > +#define KVM_RISCV_ISA_ALLOWED        (riscv_isa_extension_mask(a) | \
> > +                              riscv_isa_extension_mask(c) | \
> > +                              riscv_isa_extension_mask(d) | \
> > +                              riscv_isa_extension_mask(f) | \
> > +                              riscv_isa_extension_mask(i) | \
> > +                              riscv_isa_extension_mask(m) | \
> > +                              riscv_isa_extension_mask(s) | \
> > +                              riscv_isa_extension_mask(u))
> > +
> > +static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
> > +{
> > +     struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
> > +     struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
> > +     struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
> > +     struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
> > +
> > +     memcpy(csr, reset_csr, sizeof(*csr));
> > +
> > +     memcpy(cntx, reset_cntx, sizeof(*cntx));
> > +}
> > +
> >  struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
> >  {
> > -     /* TODO: */
> > -     return NULL;
> > +     int err;
> > +     struct kvm_vcpu *vcpu;
> > +
> > +     vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
> > +     if (!vcpu) {
> > +             err = -ENOMEM;
> > +             goto out;
> > +     }
> > +
> > +     err = kvm_vcpu_init(vcpu, kvm, id);
> > +     if (err)
> > +             goto free_vcpu;
> > +
> > +     return vcpu;
> > +
> > +free_vcpu:
> > +     kmem_cache_free(kvm_vcpu_cache, vcpu);
> > +out:
> > +     return ERR_PTR(err);
> >  }
> >
> >  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> > @@ -48,13 +86,32 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
> >
> >  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  {
> > -     /* TODO: */
> > +     struct kvm_cpu_context *cntx;
> > +
> > +     /* Mark this VCPU never ran */
> > +     vcpu->arch.ran_atleast_once = false;
> > +
> > +     /* Setup ISA features available to VCPU */
> > +     vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
> > +
> > +     /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
> > +     cntx = &vcpu->arch.guest_reset_context;
> > +     cntx->sstatus = SR_SPP | SR_SPIE;
> > +     cntx->hstatus = 0;
> > +     cntx->hstatus |= HSTATUS_SP2V;
> > +     cntx->hstatus |= HSTATUS_SP2P;
> > +     cntx->hstatus |= HSTATUS_SPV;
> > +
> > +     /* Reset VCPU */
> > +     kvm_riscv_reset_vcpu(vcpu);
> > +
> >       return 0;
> >  }
> >
> >  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> >  {
> > -     /* TODO: */
> > +     kvm_riscv_stage2_flush_cache(vcpu);
>
> There needs to be a comment as to why there's a flush here.
>
> The definition of kvm_riscv_stage2_flush_cache() should also have a comment
> describing what it actually does.

>From the implementation its pretty obvious but I will add single line
comment anyway.

>
> > +     kmem_cache_free(kvm_vcpu_cache, vcpu);
> >  }
> >
> >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> > @@ -199,6 +256,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> >       int ret;
> >       unsigned long scause, stval, htval, htinst;
> >
> > +     /* Mark this VCPU ran atleast once */
> > +     vcpu->arch.ran_atleast_once = true;
>
> I'm having some trouble figuring out how this doesn't have a race condition,
> but that's probably more applicable to the patch that uses it.  Also, a bit of
> a nit pick: "at least" is two words.  I don't care that much about the variable
> name, but the comments should use real words.

This variable touched from a single thread only hence no need for protection.

I will fix the comment like you suggested.

>
> > +
> >       vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> >
> >       /* Process MMIO value returned from user-space */
> > --
> > 2.17.1

Regards,
Anup