Hi, On Fri, Jan 28, 2022 at 12:18:24PM +0000, Marc Zyngier wrote: > Whenever we need to restore the guest's system registers to the CPU, we > now need to take care of the EL2 system registers as well. Most of them > are accessed via traps only, but some have an immediate effect and also > a guest running in VHE mode would expect them to be accessible via their > EL1 encoding, which we do not trap. > > For vEL2 we write the virtual EL2 registers with an identical format directly > into their EL1 counterpart, and translate the few registers that have a > different format for the same effect on the execution when running a > non-VHE guest guest hypervisor. > > Based on an initial patch from Andre Przywara, rewritten many times > since. > > Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> > --- > arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 5 +- > arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 2 +- > arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 125 ++++++++++++++++++++- > 3 files changed, 127 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > index 7ecca8b07851..283f780f5f56 100644 > --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > @@ -92,9 +92,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) > write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); > } > > -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) > +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, > + u64 mpidr) > { > - write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); > + write_sysreg(mpidr, vmpidr_el2); > write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); > > if (has_vhe() || > diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > index 29305022bc04..dba101565de3 100644 > --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c > @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) > > void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) > { > - __sysreg_restore_el1_state(ctxt); > + __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); > __sysreg_restore_common_state(ctxt); > __sysreg_restore_user_state(ctxt); > __sysreg_restore_el2_return_state(ctxt); > diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > index 007a12dd4351..3e26a78d00c5 100644 > --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c > @@ -13,6 +13,96 @@ > #include <asm/kvm_asm.h> > #include <asm/kvm_emulate.h> > #include <asm/kvm_hyp.h> > +#include <asm/kvm_nested.h> > + > +static void __sysreg_save_vel2_state(struct kvm_cpu_context *ctxt) > +{ > + /* These registers are common with EL1 */ > + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); > + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); > + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); > + > + ctxt_sys_reg(ctxt, ESR_EL2) = read_sysreg_el1(SYS_ESR); > + ctxt_sys_reg(ctxt, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); > + ctxt_sys_reg(ctxt, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); > + ctxt_sys_reg(ctxt, FAR_EL2) = read_sysreg_el1(SYS_FAR); > + ctxt_sys_reg(ctxt, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); > + ctxt_sys_reg(ctxt, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); > + ctxt_sys_reg(ctxt, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); > + ctxt_sys_reg(ctxt, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); > + > + /* > + * In VHE mode those registers are compatible between EL1 and EL2, > + * and the guest uses the _EL1 versions on the CPU naturally. > + * So we save them into their _EL2 versions here. > + * For nVHE mode we trap accesses to those registers, so our > + * _EL2 copy in sys_regs[] is always up-to-date and we don't need > + * to save anything here. > + */ > + if (__vcpu_el2_e2h_is_set(ctxt)) { > + ctxt_sys_reg(ctxt, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); > + ctxt_sys_reg(ctxt, CPTR_EL2) = read_sysreg_el1(SYS_CPACR); > + ctxt_sys_reg(ctxt, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); > + ctxt_sys_reg(ctxt, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); > + ctxt_sys_reg(ctxt, TCR_EL2) = read_sysreg_el1(SYS_TCR); > + ctxt_sys_reg(ctxt, CNTHCTL_EL2) = read_sysreg_el1(SYS_CNTKCTL); > + } > + > + ctxt_sys_reg(ctxt, SP_EL2) = read_sysreg(sp_el1); > + ctxt_sys_reg(ctxt, ELR_EL2) = read_sysreg_el1(SYS_ELR); > + ctxt_sys_reg(ctxt, SPSR_EL2) = __fixup_spsr_el2_read(ctxt, read_sysreg_el1(SYS_SPSR)); > +} > + > +static void __sysreg_restore_vel2_state(struct kvm_cpu_context *ctxt) > +{ > + u64 val; > + > + /* These registers are common with EL1 */ > + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); > + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); > + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); > + > + write_sysreg(read_cpuid_id(), vpidr_el2); This is sneaky. The the pseudocode for accessing MPDIR_EL1 is: if PSTATE.EL == EL0 then [..] elsif PSTATE.EL == EL1 then if EL2Enabled() && (!HaveEL(EL3) || SCR_EL3.FGTEn == '1') && HFGRTR_EL2.MIDR_EL1 == '1' then AArch64.SystemAccessTrap(EL2, 0x18); elsif EL2Enabled() then return VPIDR_EL2; else return MIDR_EL1; elsif PSTATE.EL == EL2 then return MIDR_EL1; [..]