Hi Marc, On Thu, Mar 30, 2023 at 06:47:45PM +0100, Marc Zyngier wrote: > CNTPOFF_EL2 is awesome, but it is mostly vapourware, and no publicly > available implementation has it. So for the common mortals, let's > implement the emulated version of this thing. > > It means trapping accesses to the physical counter and timer, and > emulate some of it as necessary. > > As for CNTPOFF_EL2, nobody sets the offset yet. > > Reviewed-by: Colton Lewis <coltonlewis@xxxxxxxxxx> > Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> > --- > arch/arm64/include/asm/sysreg.h | 2 + > arch/arm64/kvm/arch_timer.c | 98 +++++++++++++++++++++++------- > arch/arm64/kvm/hyp/nvhe/timer-sr.c | 18 ++++-- > arch/arm64/kvm/sys_regs.c | 9 +++ > 4 files changed, 98 insertions(+), 29 deletions(-) > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > index 9e3ecba3c4e6..f8da9e1b0c11 100644 > --- a/arch/arm64/include/asm/sysreg.h > +++ b/arch/arm64/include/asm/sysreg.h > @@ -388,6 +388,7 @@ > > #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) > > +#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) > #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) > #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) > > @@ -400,6 +401,7 @@ > > #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) > #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) > +#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) > #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) > > #define __PMEV_op2(n) ((n) & 0x7) > diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c > index 3118ea0a1b41..bb64a71ae193 100644 > --- a/arch/arm64/kvm/arch_timer.c > +++ b/arch/arm64/kvm/arch_timer.c > @@ -458,6 +458,8 @@ static void timer_save_state(struct arch_timer_context *ctx) > goto out; > > switch (index) { > + u64 cval; > + > case TIMER_VTIMER: > timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); > timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); > @@ -485,7 +487,12 @@ static void timer_save_state(struct arch_timer_context *ctx) > break; > case TIMER_PTIMER: > timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); > - timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); > + cval = read_sysreg_el0(SYS_CNTP_CVAL); > + > + if (!has_cntpoff()) > + cval -= timer_get_offset(ctx); > + > + timer_set_cval(ctx, cval); > > /* Disable the timer */ > write_sysreg_el0(0, SYS_CNTP_CTL); > @@ -555,6 +562,8 @@ static void timer_restore_state(struct arch_timer_context *ctx) > goto out; > > switch (index) { > + u64 cval, offset; > + > case TIMER_VTIMER: > set_cntvoff(timer_get_offset(ctx)); > write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); > @@ -562,8 +571,12 @@ static void timer_restore_state(struct arch_timer_context *ctx) > write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); > break; > case TIMER_PTIMER: > - set_cntpoff(timer_get_offset(ctx)); > - write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); > + cval = timer_get_cval(ctx); > + offset = timer_get_offset(ctx); > + set_cntpoff(offset); > + if (!has_cntpoff()) > + cval += offset; > + write_sysreg_el0(cval, SYS_CNTP_CVAL); > isb(); > write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); > break; > @@ -634,6 +647,61 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) > enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); > } > > +/* If _pred is true, set bit in _set, otherwise set it in _clr */ > +#define assign_clear_set_bit(_pred, _bit, _clr, _set) \ > + do { \ > + if (_pred) \ > + (_set) |= (_bit); \ > + else \ > + (_clr) |= (_bit); \ > + } while (0) > + > +static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) > +{ > + bool tpt, tpc; > + u64 clr, set; > + > + /* > + * No trapping gets configured here with nVHE. See > + * __timer_enable_traps(), which is where the stuff happens. > + */ > + if (!has_vhe()) > + return; > + > + /* > + * Our default policy is not to trap anything. As we progress > + * within this function, reality kicks in and we start adding > + * traps based on emulation requirements. > + */ > + tpt = tpc = false; > + > + /* > + * We have two possibility to deal with a physical offset: > + * > + * - Either we have CNTPOFF (yay!) or the offset is 0: > + * we let the guest freely access the HW > + * > + * - or neither of these condition apply: > + * we trap accesses to the HW, but still use it > + * after correcting the physical offset > + */ > + if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) > + tpt = tpc = true; > + > + /* > + * Now that we have collected our requirements, compute the > + * trap and enable bits. > + */ > + set = 0; > + clr = 0; > + > + assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); > + assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); Nit: IMHO the way the code specifies the 'set' and 'clr' arguments for the macro might be a bit confusing ('set' is for '_clr', and 'clr' is for '_set')? Perhaps changing the parameter names of assign_clear_set_bit() like below or flipping the condition (i.e. Specify !tpt or no_tpt instead of tpt) might be less confusing? #define assign_clear_set_bit(_pred, _bit, _t_val, _f_val) \ do { \ if (_pred) \ (_t_val) |= (_bit); \ else \ (_f_val) |= (_bit); \ } while (0) > + > + /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */ > + sysreg_clear_set(cntkctl_el1, clr, set); > +} > + > void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) > { > struct arch_timer_cpu *timer = vcpu_timer(vcpu); > @@ -657,9 +725,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) > timer_restore_state(map.direct_vtimer); > if (map.direct_ptimer) > timer_restore_state(map.direct_ptimer); > - > if (map.emul_ptimer) > timer_emulate(map.emul_ptimer); > + > + timer_set_traps(vcpu, &map); > } > > bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) > @@ -1292,28 +1361,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) > return 0; > } > > -/* > - * On VHE system, we only need to configure the EL2 timer trap register once, > - * not for every world switch. > - * The host kernel runs at EL2 with HCR_EL2.TGE == 1, > - * and this makes those bits have no effect for the host kernel execution. > - */ > +/* If we have CNTPOFF, permanently set ECV to enable it */ > void kvm_timer_init_vhe(void) > { > - /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ > - u32 cnthctl_shift = 10; > - u64 val; > - > - /* > - * VHE systems allow the guest direct access to the EL1 physical > - * timer/counter. > - */ > - val = read_sysreg(cnthctl_el2); > - val |= (CNTHCTL_EL1PCEN << cnthctl_shift); > - val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); > if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) > - val |= CNTHCTL_ECV; > - write_sysreg(val, cnthctl_el2); > + sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); > } > > static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) > diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c > index 9072e71693ba..b185ac0dbd47 100644 > --- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c > +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c > @@ -9,6 +9,7 @@ > #include <linux/kvm_host.h> > > #include <asm/kvm_hyp.h> > +#include <asm/kvm_mmu.h> > > void __kvm_timer_set_cntvoff(u64 cntvoff) > { > @@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu) > */ > void __timer_enable_traps(struct kvm_vcpu *vcpu) > { > - u64 val; > + u64 clr = 0, set = 0; > > /* > * Disallow physical timer access for the guest > - * Physical counter access is allowed > + * Physical counter access is allowed if no offset is enforced > + * or running protected (we don't offset anything in this case). > */ > - val = read_sysreg(cnthctl_el2); > - val &= ~CNTHCTL_EL1PCEN; > - val |= CNTHCTL_EL1PCTEN; > - write_sysreg(val, cnthctl_el2); > + clr = CNTHCTL_EL1PCEN; Nit: Perhaps "clr |= CNTHCTL_EL1PCEN;" would be more straightforward (or initialize 'clr' to CNTHCTL_EL1PCEN ?). > + if (is_protected_kvm_enabled() || > + !kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset) > + set |= CNTHCTL_EL1PCTEN; > + else > + clr |= CNTHCTL_EL1PCTEN; > + > + sysreg_clear_set(cnthctl_el2, clr, set); > } > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 53749d3a0996..be7c2598e563 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -1139,6 +1139,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, > tmr = TIMER_PTIMER; > treg = TIMER_REG_CVAL; > break; > + case SYS_CNTPCT_EL0: > + case SYS_CNTPCTSS_EL0: > + case SYS_AARCH32_CNTPCT: > + tmr = TIMER_PTIMER; > + treg = TIMER_REG_CNT; > + break; > default: > print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); > kvm_inject_undefined(vcpu); > @@ -2075,6 +2081,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { > AMU_AMEVTYPER1_EL0(14), > AMU_AMEVTYPER1_EL0(15), > > + { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, > + { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, > { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, > { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, > { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, > @@ -2525,6 +2533,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { > { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, > { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr }, > { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ > + { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer }, Shouldn't KVM also emulate CNTPCTSS (Aarch32) when its trapping is enabled on the host with ECV_CNTPOFF ? > { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, > { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ > { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ > -- > 2.34.1 > > Nit: In emulating reading physical counter/timer for direct_ptimer (poffset != 0 on VHE without ECV_CNTPOFF), it appears that kvm_arm_timer_read_sysreg() unnecessarily calls timer_{save,restore}_state(), and kvm_arm_timer_write_sysreg() unnecessarily calls timer_save_state(). Couldn't we skip those unnecessary calls ? (I didn't check all the following patches, but the current code would be more convenient in the following patches ?) Thank you, Reiji