> Am 23.08.2019 um 13:05 schrieb Anup Patel <anup@xxxxxxxxxxxxxx>: > >> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@xxxxxxxxxx> wrote: >> >>> On 22.08.19 10:46, Anup Patel wrote: >>> From: Atish Patra <atish.patra@xxxxxxx> >>> >>> The RISC-V hypervisor specification doesn't have any virtual timer >>> feature. >>> >>> Due to this, the guest VCPU timer will be programmed via SBI calls. >>> The host will use a separate hrtimer event for each guest VCPU to >>> provide timer functionality. We inject a virtual timer interrupt to >>> the guest VCPU whenever the guest VCPU hrtimer event expires. >>> >>> The following features are not supported yet and will be added in >>> future: >>> 1. A time offset to adjust guest time from host time >>> 2. A saved next event in guest vcpu for vm migration >> >> Implementing these 2 bits right now should be trivial. Why wait? > > We were waiting for HTIMEDELTA CSR to be merged so we > deferred this items. > >> >>> >>> Signed-off-by: Atish Patra <atish.patra@xxxxxxx> >>> Signed-off-by: Anup Patel <anup.patel@xxxxxxx> >>> Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >>> Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >>> --- >>> arch/riscv/include/asm/kvm_host.h | 4 + >>> arch/riscv/include/asm/kvm_vcpu_timer.h | 32 +++++++ >>> arch/riscv/kvm/Makefile | 2 +- >>> arch/riscv/kvm/vcpu.c | 6 ++ >>> arch/riscv/kvm/vcpu_timer.c | 106 ++++++++++++++++++++++++ >>> drivers/clocksource/timer-riscv.c | 8 ++ >>> include/clocksource/timer-riscv.h | 16 ++++ >>> 7 files changed, 173 insertions(+), 1 deletion(-) >>> create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h >>> create mode 100644 arch/riscv/kvm/vcpu_timer.c >>> create mode 100644 include/clocksource/timer-riscv.h >>> >>> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h >>> index ab33e59a3d88..d2a2e45eefc0 100644 >>> --- a/arch/riscv/include/asm/kvm_host.h >>> +++ b/arch/riscv/include/asm/kvm_host.h >>> @@ -12,6 +12,7 @@ >>> #include <linux/types.h> >>> #include <linux/kvm.h> >>> #include <linux/kvm_types.h> >>> +#include <asm/kvm_vcpu_timer.h> >>> >>> #ifdef CONFIG_64BIT >>> #define KVM_MAX_VCPUS (1U << 16) >>> @@ -167,6 +168,9 @@ struct kvm_vcpu_arch { >>> unsigned long irqs_pending; >>> unsigned long irqs_pending_mask; >>> >>> + /* VCPU Timer */ >>> + struct kvm_vcpu_timer timer; >>> + >>> /* MMIO instruction details */ >>> struct kvm_mmio_decode mmio_decode; >>> >>> diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h >>> new file mode 100644 >>> index 000000000000..df67ea86988e >>> --- /dev/null >>> +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h >>> @@ -0,0 +1,32 @@ >>> +/* SPDX-License-Identifier: GPL-2.0-only */ >>> +/* >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates. >>> + * >>> + * Authors: >>> + * Atish Patra <atish.patra@xxxxxxx> >>> + */ >>> + >>> +#ifndef __KVM_VCPU_RISCV_TIMER_H >>> +#define __KVM_VCPU_RISCV_TIMER_H >>> + >>> +#include <linux/hrtimer.h> >>> + >>> +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000 >>> + >>> +struct kvm_vcpu_timer { >>> + bool init_done; >>> + /* Check if the timer is programmed */ >>> + bool is_set; >>> + struct hrtimer hrt; >>> + /* Mult & Shift values to get nanosec from cycles */ >>> + u32 mult; >>> + u32 shift; >>> +}; >>> + >>> +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); >>> +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); >>> +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, >>> + unsigned long ncycles); >> >> This function never gets called? > > It's called from SBI emulation. > >> >>> + >>> +#endif >>> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile >>> index c0f57f26c13d..3e0c7558320d 100644 >>> --- a/arch/riscv/kvm/Makefile >>> +++ b/arch/riscv/kvm/Makefile >>> @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm >>> kvm-objs := $(common-objs-y) >>> >>> kvm-objs += main.o vm.o vmid.o tlb.o mmu.o >>> -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o >>> +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o >>> >>> obj-$(CONFIG_KVM) += kvm.o >>> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c >>> index 6124077d154f..018fca436776 100644 >>> --- a/arch/riscv/kvm/vcpu.c >>> +++ b/arch/riscv/kvm/vcpu.c >>> @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) >>> >>> memcpy(cntx, reset_cntx, sizeof(*cntx)); >>> >>> + kvm_riscv_vcpu_timer_reset(vcpu); >>> + >>> WRITE_ONCE(vcpu->arch.irqs_pending, 0); >>> WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); >>> } >>> @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) >>> cntx->hstatus |= HSTATUS_SP2P; >>> cntx->hstatus |= HSTATUS_SPV; >>> >>> + /* Setup VCPU timer */ >>> + kvm_riscv_vcpu_timer_init(vcpu); >>> + >>> /* Reset VCPU */ >>> kvm_riscv_reset_vcpu(vcpu); >>> >>> @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) >>> >>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) >>> { >>> + kvm_riscv_vcpu_timer_deinit(vcpu); >>> kvm_riscv_stage2_flush_cache(vcpu); >>> kmem_cache_free(kvm_vcpu_cache, vcpu); >>> } >>> diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c >>> new file mode 100644 >>> index 000000000000..a45ca06e1aa6 >>> --- /dev/null >>> +++ b/arch/riscv/kvm/vcpu_timer.c >>> @@ -0,0 +1,106 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates. >>> + * >>> + * Authors: >>> + * Atish Patra <atish.patra@xxxxxxx> >>> + */ >>> + >>> +#include <linux/errno.h> >>> +#include <linux/err.h> >>> +#include <linux/kvm_host.h> >>> +#include <clocksource/timer-riscv.h> >>> +#include <asm/csr.h> >>> +#include <asm/kvm_vcpu_timer.h> >>> + >>> +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) >>> +{ >>> + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); >>> + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); >>> + >>> + t->is_set = false; >>> + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); >>> + >>> + return HRTIMER_NORESTART; >>> +} >>> + >>> +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t) >>> +{ >>> + unsigned long flags; >>> + u64 cycles_now, cycles_delta, delta_ns; >>> + >>> + local_irq_save(flags); >>> + cycles_now = get_cycles64(); >>> + if (cycles_now < cycles) >>> + cycles_delta = cycles - cycles_now; >>> + else >>> + cycles_delta = 0; >>> + delta_ns = (cycles_delta * t->mult) >> t->shift; >>> + local_irq_restore(flags); >>> + >>> + return delta_ns; >>> +} >>> + >>> +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) >>> +{ >>> + if (!t->init_done || !t->is_set) >>> + return -EINVAL; >>> + >>> + hrtimer_cancel(&t->hrt); >>> + t->is_set = false; >>> + >>> + return 0; >>> +} >>> + >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, >>> + unsigned long ncycles) >>> +{ >>> + struct kvm_vcpu_timer *t = &vcpu->arch.timer; >>> + u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t); >> >> ... in fact, I feel like I'm missing something obvious here. How does >> the guest trigger the timer event? What is the argument it uses for that >> and how does that play with the tbfreq in the earlier patch? > > We have SBI call inferface between Hypervisor and Guest. One of the > SBI call allows Guest to program time event. The next event is specified > as absolute cycles. The Guest can read time using TIME CSR which > returns system timer value (@ tbfreq freqency). > > Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL > and it has to be same as Host tbfreq. > > The TBFREQ config register visible to user-space is a read-only CONFIG > register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. And it's read-only because you can not trap on TB reads? Alex > > Regards, > Anup > >> >> >> Alex >>