On Mon, Aug 5, 2019 at 7:13 PM Anup Patel <Anup.Patel@xxxxxxx> wrote: > > This patch implements VCPU interrupts and requests which are both > asynchronous events. > > The VCPU interrupts can be set/unset using KVM_INTERRUPT ioctl from > user-space. In future, the in-kernel IRQCHIP emulation will use > kvm_riscv_vcpu_set_interrupt() and kvm_riscv_vcpu_unset_interrupt() > functions to set/unset VCPU interrupts. > > Important VCPU requests implemented by this patch are: > KVM_REQ_SLEEP - set whenever VCPU itself goes to sleep state > KVM_REQ_VCPU_RESET - set whenever VCPU reset is requested > > The WFI trap-n-emulate (added later) will use KVM_REQ_SLEEP request > and kvm_riscv_vcpu_has_interrupt() function. > > The KVM_REQ_VCPU_RESET request will be used by SBI emulation (added > later) to power-up a VCPU in power-off state. The user-space can use > the GET_MPSTATE/SET_MPSTATE ioctls to get/set power state of a VCPU. > > Signed-off-by: Anup Patel <anup.patel@xxxxxxx> > --- > arch/riscv/include/asm/kvm_host.h | 23 ++++ > arch/riscv/include/uapi/asm/kvm.h | 3 + > arch/riscv/kvm/main.c | 2 + > arch/riscv/kvm/vcpu.c | 169 +++++++++++++++++++++++++++--- > 4 files changed, 184 insertions(+), 13 deletions(-) > > diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h > index dab32c9c3470..04804f14f760 100644 > --- a/arch/riscv/include/asm/kvm_host.h > +++ b/arch/riscv/include/asm/kvm_host.h > @@ -122,6 +122,21 @@ struct kvm_vcpu_arch { > /* CPU CSR context upon Guest VCPU reset */ > struct kvm_vcpu_csr guest_reset_csr; > > + /* > + * VCPU interrupts > + * > + * We have a lockless approach for tracking pending VCPU interrupts > + * implemented using atomic bitops. The irqs_pending bitmap represent > + * pending interrupts whereas irqs_pending_mask represent bits changed > + * in irqs_pending. Our approach is modeled around multiple producer > + * and single consumer problem where the consumer is the VCPU itself. > + */ > + unsigned long irqs_pending; > + unsigned long irqs_pending_mask; > + > + /* VCPU power-off state */ > + bool power_off; > + > /* Don't run the VCPU (blocked) */ > bool pause; > > @@ -146,4 +161,12 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, > > static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {} > > +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); > +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); > +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); > +bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); > + > #endif /* __RISCV_KVM_HOST_H__ */ > diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h > index d15875818b6e..6dbc056d58ba 100644 > --- a/arch/riscv/include/uapi/asm/kvm.h > +++ b/arch/riscv/include/uapi/asm/kvm.h > @@ -18,6 +18,9 @@ > > #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 > > +#define KVM_INTERRUPT_SET -1U > +#define KVM_INTERRUPT_UNSET -2U > + > /* for KVM_GET_REGS and KVM_SET_REGS */ > struct kvm_regs { > }; > diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c > index a26a68df7cfc..f4a7a3c67f8e 100644 > --- a/arch/riscv/kvm/main.c > +++ b/arch/riscv/kvm/main.c > @@ -48,6 +48,8 @@ int kvm_arch_hardware_enable(void) > hideleg |= SIE_SEIE; > csr_write(CSR_HIDELEG, hideleg); > > + csr_write(CSR_VSIP, 0); > + > return 0; > } > > diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c > index ff08d138f7c3..455b0f40832b 100644 > --- a/arch/riscv/kvm/vcpu.c > +++ b/arch/riscv/kvm/vcpu.c > @@ -40,6 +40,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { > RISCV_ISA_EXT_s | \ > RISCV_ISA_EXT_u) > > +static DEFINE_PER_CPU(unsigned long, vsip_shadow); > + With introduction of compile-time percpu variable here, the insmod fails to insert KVM RISC-V as loadable module. This looks like some issue with arch/riscv/kernel/module.c. I tried run-time percpu variables using alloc_percpu() API and it works perfectly fine with it. I will make vsip_shadow as run-time percpu variable instead of compile-time in v4 series. > static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) > { > struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; > @@ -50,6 +52,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) > memcpy(csr, reset_csr, sizeof(*csr)); > > memcpy(cntx, reset_cntx, sizeof(*cntx)); > + > + WRITE_ONCE(vcpu->arch.irqs_pending, 0); > + WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); > } > > struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) > @@ -116,8 +121,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > > int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) > { > - /* TODO: */ > - return 0; > + return READ_ONCE(vcpu->arch.irqs_pending) & (1UL << IRQ_S_TIMER); > } > > void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) > @@ -130,20 +134,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) > > int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) > { > - /* TODO: */ > - return 0; > + return (kvm_riscv_vcpu_has_interrupt(vcpu) && > + !vcpu->arch.power_off && !vcpu->arch.pause); > } > > int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) > { > - /* TODO: */ > - return 0; > + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; > } > > bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) > { > - /* TODO: */ > - return false; > + return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; > } > > bool kvm_arch_has_vcpu_debugfs(void) > @@ -164,7 +166,21 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) > long kvm_arch_vcpu_async_ioctl(struct file *filp, > unsigned int ioctl, unsigned long arg) > { > - /* TODO; */ > + struct kvm_vcpu *vcpu = filp->private_data; > + void __user *argp = (void __user *)arg; > + > + if (ioctl == KVM_INTERRUPT) { > + struct kvm_interrupt irq; > + > + if (copy_from_user(&irq, argp, sizeof(irq))) > + return -EFAULT; > + > + if (irq.irq == KVM_INTERRUPT_SET) > + return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_EXT); > + else > + return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_EXT); > + } > + > return -ENOIOCTLCMD; > } > > @@ -213,18 +229,103 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > return -EINVAL; > } > > +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) > +{ > + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; > + unsigned long mask, val; > + > + if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { > + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); > + val = READ_ONCE(vcpu->arch.irqs_pending) & mask; > + > + csr->vsip &= ~mask; > + csr->vsip |= val; > + } > +} > + > +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) > +{ > + vcpu->arch.guest_csr.vsip = csr_read(CSR_VSIP); > + vcpu->arch.guest_csr.vsie = csr_read(CSR_VSIE); > +} > + > +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) > +{ > + if (irq != IRQ_S_SOFT && > + irq != IRQ_S_TIMER && > + irq != IRQ_S_EXT) > + return -EINVAL; > + > + set_bit(irq, &vcpu->arch.irqs_pending); > + smp_mb__before_atomic(); > + set_bit(irq, &vcpu->arch.irqs_pending_mask); > + > + kvm_vcpu_kick(vcpu); > + > + return 0; > +} > + > +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) > +{ > + if (irq != IRQ_S_SOFT && > + irq != IRQ_S_TIMER && > + irq != IRQ_S_EXT) > + return -EINVAL; > + > + clear_bit(irq, &vcpu->arch.irqs_pending); > + smp_mb__before_atomic(); > + set_bit(irq, &vcpu->arch.irqs_pending_mask); > + > + return 0; > +} > + > +bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu) > +{ > + return (READ_ONCE(vcpu->arch.irqs_pending) & > + vcpu->arch.guest_csr.vsie) ? true : false; > +} > + > +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) > +{ > + vcpu->arch.power_off = true; > + kvm_make_request(KVM_REQ_SLEEP, vcpu); > + kvm_vcpu_kick(vcpu); > +} > + > +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) > +{ > + vcpu->arch.power_off = false; > + kvm_vcpu_wake_up(vcpu); > +} > + > int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, > struct kvm_mp_state *mp_state) > { > - /* TODO: */ > + if (vcpu->arch.power_off) > + mp_state->mp_state = KVM_MP_STATE_STOPPED; > + else > + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; > + > return 0; > } > > int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, > struct kvm_mp_state *mp_state) > { > - /* TODO: */ > - return 0; > + int ret = 0; > + > + switch (mp_state->mp_state) { > + case KVM_MP_STATE_RUNNABLE: > + vcpu->arch.power_off = false; > + break; > + case KVM_MP_STATE_STOPPED: > + kvm_riscv_vcpu_power_off(vcpu); > + break; > + default: > + ret = -EINVAL; > + } > + > + return ret; > } > > int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, > @@ -248,7 +349,37 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > > static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) > { > - /* TODO: */ > + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); > + > + if (kvm_request_pending(vcpu)) { > + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { > + swait_event_interruptible_exclusive(*wq, > + ((!vcpu->arch.power_off) && > + (!vcpu->arch.pause))); > + > + if (vcpu->arch.power_off || vcpu->arch.pause) { > + /* > + * Awaken to handle a signal, request to > + * sleep again later. > + */ > + kvm_make_request(KVM_REQ_SLEEP, vcpu); > + } > + } > + > + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) > + kvm_riscv_reset_vcpu(vcpu); > + } > +} > + > +static void kvm_riscv_update_vsip(struct kvm_vcpu *vcpu) > +{ > + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; > + unsigned long *vsip = this_cpu_ptr(&vsip_shadow); > + > + if (*vsip != csr->vsip) { > + csr_write(CSR_VSIP, csr->vsip); > + *vsip = csr->vsip; > + } > } > > int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > @@ -311,6 +442,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); > smp_mb__after_srcu_read_unlock(); > > + /* > + * We might have got VCPU interrupts updated asynchronously > + * so update it in HW. > + */ > + kvm_riscv_vcpu_flush_interrupts(vcpu); > + > + /* Update VSIP CSR for current CPU */ > + kvm_riscv_update_vsip(vcpu); > + > if (ret <= 0 || > kvm_request_pending(vcpu)) { > vcpu->mode = OUTSIDE_GUEST_MODE; > @@ -334,6 +474,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > scause = csr_read(CSR_SCAUSE); > stval = csr_read(CSR_STVAL); > > + /* Syncup interrupts state with HW */ > + kvm_riscv_vcpu_sync_interrupts(vcpu); > + > /* > * We may have taken a host interrupt in VS/VU-mode (i.e. > * while executing the guest). This interrupt is still > -- > 2.17.1 > Regards, Anup