On 02/08/19 09:47, Anup Patel wrote: > +static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) > +{ > + if (kvm_request_pending(vcpu)) { > + /* TODO: */ > + > + /* > + * Clear IRQ_PENDING requests that were made to guarantee > + * that a VCPU sees new virtual interrupts. > + */ > + kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); > + } > +} This kvm_check_request can go away (as it does in patch 6). > +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > +{ > + int ret; > + unsigned long scause, stval; You need to wrap this with srcu_read_lock/srcu_read_unlock, otherwise stage2_page_fault can access freed memslot arrays. (ARM doesn't have this issue because it does not have to decode instructions on MMIO faults). That is, vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); > + /* Process MMIO value returned from user-space */ > + if (run->exit_reason == KVM_EXIT_MMIO) { > + ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run); > + if (ret) > + return ret; > + } > + > + if (run->immediate_exit) > + return -EINTR; > + > + vcpu_load(vcpu); > + > + kvm_sigset_activate(vcpu); > + > + ret = 1; > + run->exit_reason = KVM_EXIT_UNKNOWN; > + while (ret > 0) { > + /* Check conditions before entering the guest */ > + cond_resched(); > + > + kvm_riscv_check_vcpu_requests(vcpu); > + > + preempt_disable(); > + > + local_irq_disable(); > + > + /* > + * Exit if we have a signal pending so that we can deliver > + * the signal to user space. > + */ > + if (signal_pending(current)) { > + ret = -EINTR; > + run->exit_reason = KVM_EXIT_INTR; > + } Add an srcu_read_unlock here (and then the smp_store_mb can become smp_mb__after_srcu_read_unlock + WRITE_ONCE). > + /* > + * Ensure we set mode to IN_GUEST_MODE after we disable > + * interrupts and before the final VCPU requests check. > + * See the comment in kvm_vcpu_exiting_guest_mode() and > + * Documentation/virtual/kvm/vcpu-requests.rst > + */ > + smp_store_mb(vcpu->mode, IN_GUEST_MODE); > + > + if (ret <= 0 || > + kvm_request_pending(vcpu)) { > + vcpu->mode = OUTSIDE_GUEST_MODE; > + local_irq_enable(); > + preempt_enable(); > + continue; > + } > + > + guest_enter_irqoff(); > + > + __kvm_riscv_switch_to(&vcpu->arch); > + > + vcpu->mode = OUTSIDE_GUEST_MODE; > + vcpu->stat.exits++; > + > + /* Save SCAUSE and STVAL because we might get an interrupt > + * between __kvm_riscv_switch_to() and local_irq_enable() > + * which can potentially overwrite SCAUSE and STVAL. > + */ > + scause = csr_read(CSR_SCAUSE); > + stval = csr_read(CSR_STVAL); > + > + /* > + * We may have taken a host interrupt in VS/VU-mode (i.e. > + * while executing the guest). This interrupt is still > + * pending, as we haven't serviced it yet! > + * > + * We're now back in HS-mode with interrupts disabled > + * so enabling the interrupts now will have the effect > + * of taking the interrupt again, in HS-mode this time. > + */ > + local_irq_enable(); > + > + /* > + * We do local_irq_enable() before calling guest_exit() so > + * that if a timer interrupt hits while running the guest > + * we account that tick as being spent in the guest. We > + * enable preemption after calling guest_exit() so that if > + * we get preempted we make sure ticks after that is not > + * counted as guest time. > + */ > + guest_exit(); > + > + preempt_enable(); And another srcu_read_lock here. Using vcpu->srcu_idx instead of a local variable also allows system_opcode_insn to wrap kvm_vcpu_block with a srcu_read_unlock/srcu_read_lock pair. > + ret = kvm_riscv_vcpu_exit(vcpu, run, scause, stval); > + } > + > + kvm_sigset_deactivate(vcpu); And finally srcu_read_unlock here. Paolo > + vcpu_put(vcpu); > + return ret; > +} > diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c > new file mode 100644 > index 000000000000..e4d7c8f0807a > --- /dev/null > +++ b/arch/riscv/kvm/vcpu_exit.c > @@ -0,0 +1,35 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > + * > + * Authors: > + * Anup Patel <anup.patel@xxxxxxx> > + */ > + > +#include <linux/errno.h> > +#include <linux/err.h> > +#include <linux/kvm_host.h> > + > +/** > + * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation > + * or in-kernel IO emulation > + * > + * @vcpu: The VCPU pointer > + * @run: The VCPU run struct containing the mmio data > + */ > +int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) > +{ > + /* TODO: */ > + return 0; > +} > + > +/* > + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on > + * proper exit to userspace. > + */ > +int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, > + unsigned long scause, unsigned long stval) > +{ > + /* TODO: */ > + return 0; > +} > diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c > new file mode 100644 > index 000000000000..ac0211820521 > --- /dev/null > +++ b/arch/riscv/kvm/vm.c > @@ -0,0 +1,79 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > + * > + * Authors: > + * Anup Patel <anup.patel@xxxxxxx> > + */ > + > +#include <linux/errno.h> > +#include <linux/err.h> > +#include <linux/module.h> > +#include <linux/uaccess.h> > +#include <linux/kvm_host.h> > + > +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) > +{ > + /* TODO: To be added later. */ > + return -ENOTSUPP; > +} > + > +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > +{ > + int r; > + > + r = kvm_riscv_stage2_alloc_pgd(kvm); > + if (r) > + return r; > + > + return 0; > +} > + > +void kvm_arch_destroy_vm(struct kvm *kvm) > +{ > + int i; > + > + for (i = 0; i < KVM_MAX_VCPUS; ++i) { > + if (kvm->vcpus[i]) { > + kvm_arch_vcpu_destroy(kvm->vcpus[i]); > + kvm->vcpus[i] = NULL; > + } > + } > +} > + > +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > +{ > + int r; > + > + switch (ext) { > + case KVM_CAP_DEVICE_CTRL: > + case KVM_CAP_USER_MEMORY: > + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: > + case KVM_CAP_ONE_REG: > + case KVM_CAP_READONLY_MEM: > + case KVM_CAP_MP_STATE: > + case KVM_CAP_IMMEDIATE_EXIT: > + r = 1; > + break; > + case KVM_CAP_NR_VCPUS: > + r = num_online_cpus(); > + break; > + case KVM_CAP_MAX_VCPUS: > + r = KVM_MAX_VCPUS; > + break; > + case KVM_CAP_NR_MEMSLOTS: > + r = KVM_USER_MEM_SLOTS; > + break; > + default: > + r = 0; > + break; > + } > + > + return r; > +} > + > +long kvm_arch_vm_ioctl(struct file *filp, > + unsigned int ioctl, unsigned long arg) > +{ > + return -EINVAL; > +} >