On 24/06/2019 13:42, Julien Thierry wrote: > > > On 06/21/2019 10:37 AM, Marc Zyngier wrote: >> From: Andre Przywara <andre.przywara@xxxxxxx> >> >> KVM internally uses accessor functions when reading or writing the >> guest's system registers. This takes care of accessing either the stored >> copy or using the "live" EL1 system registers when the host uses VHE. >> >> With the introduction of virtual EL2 we add a bunch of EL2 system >> registers, which now must also be taken care of: >> - If the guest is running in vEL2, and we access an EL1 sysreg, we must >> revert to the stored version of that, and not use the CPU's copy. >> - If the guest is running in vEL1, and we access an EL2 sysreg, we must >> also use the stored version, since the CPU carries the EL1 copy. >> - Some EL2 system registers are supposed to affect the current execution >> of the system, so we need to put them into their respective EL1 >> counterparts. For this we need to define a mapping between the two. >> This is done using the newly introduced struct el2_sysreg_map. >> - Some EL2 system registers have a different format than their EL1 >> counterpart, so we need to translate them before writing them to the >> CPU. This is done using an (optional) translate function in the map. >> - There are the three special registers SP_EL2, SPSR_EL2 and ELR_EL2, >> which need some separate handling. >> >> All of these cases are now wrapped into the existing accessor functions, >> so KVM users wouldn't need to care whether they access EL2 or EL1 >> registers and also which state the guest is in. >> >> This handles what was formerly known as the "shadow state" dynamically, >> without requiring a separate copy for each vCPU EL. >> >> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx> >> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> >> --- >> arch/arm64/include/asm/kvm_emulate.h | 6 + >> arch/arm64/include/asm/kvm_host.h | 5 + >> arch/arm64/kvm/sys_regs.c | 163 +++++++++++++++++++++++++++ >> 3 files changed, 174 insertions(+) >> >> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h >> index c43aac5fed69..f37006b6eec4 100644 >> --- a/arch/arm64/include/asm/kvm_emulate.h >> +++ b/arch/arm64/include/asm/kvm_emulate.h >> @@ -70,6 +70,12 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); >> int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); >> int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); >> >> +u64 translate_tcr(u64 tcr); >> +u64 translate_cptr(u64 tcr); >> +u64 translate_sctlr(u64 tcr); >> +u64 translate_ttbr0(u64 tcr); >> +u64 translate_cnthctl(u64 tcr); >> + >> static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) >> { >> return !(vcpu->arch.hcr_el2 & HCR_RW); >> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h >> index 2d4290d2513a..dae9c42a7219 100644 >> --- a/arch/arm64/include/asm/kvm_host.h >> +++ b/arch/arm64/include/asm/kvm_host.h >> @@ -217,6 +217,11 @@ enum vcpu_sysreg { >> NR_SYS_REGS /* Nothing after this line! */ >> }; >> >> +static inline bool sysreg_is_el2(int reg) >> +{ >> + return reg >= FIRST_EL2_SYSREG && reg < NR_SYS_REGS; >> +} >> + >> /* 32bit mapping */ >> #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ >> #define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ >> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c >> index 693dd063c9c2..d024114da162 100644 >> --- a/arch/arm64/kvm/sys_regs.c >> +++ b/arch/arm64/kvm/sys_regs.c >> @@ -76,11 +76,142 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, >> return false; >> } >> >> +static u64 tcr_el2_ips_to_tcr_el1_ps(u64 tcr_el2) >> +{ >> + return ((tcr_el2 & TCR_EL2_PS_MASK) >> TCR_EL2_PS_SHIFT) >> + << TCR_IPS_SHIFT; >> +} >> + >> +u64 translate_tcr(u64 tcr) >> +{ >> + return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ >> + ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | >> + tcr_el2_ips_to_tcr_el1_ps(tcr) | >> + (tcr & TCR_EL2_TG0_MASK) | >> + (tcr & TCR_EL2_ORGN0_MASK) | >> + (tcr & TCR_EL2_IRGN0_MASK) | >> + (tcr & TCR_EL2_T0SZ_MASK); >> +} >> + >> +u64 translate_cptr(u64 cptr_el2) >> +{ >> + u64 cpacr_el1 = 0; >> + >> + if (!(cptr_el2 & CPTR_EL2_TFP)) >> + cpacr_el1 |= CPACR_EL1_FPEN; >> + if (cptr_el2 & CPTR_EL2_TTA) >> + cpacr_el1 |= CPACR_EL1_TTA; >> + if (!(cptr_el2 & CPTR_EL2_TZ)) >> + cpacr_el1 |= CPACR_EL1_ZEN; >> + >> + return cpacr_el1; >> +} >> + >> +u64 translate_sctlr(u64 sctlr) >> +{ >> + /* Bit 20 is RES1 in SCTLR_EL1, but RES0 in SCTLR_EL2 */ >> + return sctlr | BIT(20); >> +} >> + >> +u64 translate_ttbr0(u64 ttbr0) >> +{ >> + /* Force ASID to 0 (ASID 0 or RES0) */ >> + return ttbr0 & ~GENMASK_ULL(63, 48); >> +} >> + >> +u64 translate_cnthctl(u64 cnthctl) >> +{ >> + return ((cnthctl & 0x3) << 10) | (cnthctl & 0xfc); >> +} >> + >> +#define EL2_SYSREG(el2, el1, translate) \ >> + [el2 - FIRST_EL2_SYSREG] = { el2, el1, translate } >> +#define PURE_EL2_SYSREG(el2) \ >> + [el2 - FIRST_EL2_SYSREG] = { el2,__INVALID_SYSREG__, NULL } >> +/* >> + * Associate vEL2 registers to their EL1 counterparts on the CPU. >> + * The translate function can be NULL, when the register layout is identical. >> + */ >> +struct el2_sysreg_map { >> + int sysreg; /* EL2 register index into the array above */ >> + int mapping; /* associated EL1 register */ >> + u64 (*translate)(u64 value); >> +} nested_sysreg_map[NR_SYS_REGS - FIRST_EL2_SYSREG] = { >> + PURE_EL2_SYSREG( VPIDR_EL2 ), >> + PURE_EL2_SYSREG( VMPIDR_EL2 ), >> + PURE_EL2_SYSREG( ACTLR_EL2 ), >> + PURE_EL2_SYSREG( HCR_EL2 ), >> + PURE_EL2_SYSREG( MDCR_EL2 ), >> + PURE_EL2_SYSREG( HSTR_EL2 ), >> + PURE_EL2_SYSREG( HACR_EL2 ), >> + PURE_EL2_SYSREG( VTTBR_EL2 ), >> + PURE_EL2_SYSREG( VTCR_EL2 ), >> + PURE_EL2_SYSREG( RVBAR_EL2 ), >> + PURE_EL2_SYSREG( RMR_EL2 ), >> + PURE_EL2_SYSREG( TPIDR_EL2 ), >> + PURE_EL2_SYSREG( CNTVOFF_EL2 ), >> + PURE_EL2_SYSREG( CNTHCTL_EL2 ), >> + PURE_EL2_SYSREG( HPFAR_EL2 ), >> + EL2_SYSREG( SCTLR_EL2, SCTLR_EL1, translate_sctlr ), >> + EL2_SYSREG( CPTR_EL2, CPACR_EL1, translate_cptr ), >> + EL2_SYSREG( TTBR0_EL2, TTBR0_EL1, translate_ttbr0 ), >> + EL2_SYSREG( TTBR1_EL2, TTBR1_EL1, NULL ), >> + EL2_SYSREG( TCR_EL2, TCR_EL1, translate_tcr ), >> + EL2_SYSREG( VBAR_EL2, VBAR_EL1, NULL ), >> + EL2_SYSREG( AFSR0_EL2, AFSR0_EL1, NULL ), >> + EL2_SYSREG( AFSR1_EL2, AFSR1_EL1, NULL ), >> + EL2_SYSREG( ESR_EL2, ESR_EL1, NULL ), >> + EL2_SYSREG( FAR_EL2, FAR_EL1, NULL ), >> + EL2_SYSREG( MAIR_EL2, MAIR_EL1, NULL ), >> + EL2_SYSREG( AMAIR_EL2, AMAIR_EL1, NULL ), >> +}; >> + >> +static >> +const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, >> + int reg) >> +{ >> + const struct el2_sysreg_map *entry; >> + >> + if (!sysreg_is_el2(reg)) >> + return NULL; >> + >> + entry = &nested_sysreg_map[reg - FIRST_EL2_SYSREG]; >> + if (entry->sysreg == __INVALID_SYSREG__) >> + return NULL; >> + >> + return entry; >> +} >> + >> u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) >> { >> + >> if (!vcpu->arch.sysregs_loaded_on_cpu) >> goto immediate_read; >> >> + if (unlikely(sysreg_is_el2(reg))) { >> + const struct el2_sysreg_map *el2_reg; >> + >> + if (!is_hyp_ctxt(vcpu)) >> + goto immediate_read; >> + >> + el2_reg = find_el2_sysreg(nested_sysreg_map, reg); >> + if (el2_reg) { >> + /* >> + * If this register does not have an EL1 counterpart, >> + * then read the stored EL2 version. >> + */ >> + if (el2_reg->mapping == __INVALID_SYSREG__) > > In this patch, find_el2_sysreg returns NULL for PURE_EL2 registers. So That's not how I read this code. You get NULL if the the EL2 sysreg is set to __INVALID_SYSREG__, of which there is no occurrence (yeah, dead code). > for PURE_EL2, the access would go through the switch case. However this > branch suggest that for PURE_EL2 register we intend to do the read from > the memory backed version. > > Which should it be? My understanding of this code is that we're actually hitting memory here. Am I missing anything? Note that I'm actively refactoring this code as part of of the ARMv8.4-NV effort, hopefully making it a bit simpler: https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=kvm-arm64/nv-wip-5.2-rc6&id=ea93236776772ce08e0eab51d9b77a9197121fde > >> + goto immediate_read; >> + >> + /* Get the current version of the EL1 counterpart. */ >> + reg = el2_reg->mapping; >> + } >> + } else { >> + /* EL1 register can't be on the CPU if the guest is in vEL2. */ >> + if (unlikely(is_hyp_ctxt(vcpu))) >> + goto immediate_read; >> + } >> + >> /* >> * System registers listed in the switch are not saved on every >> * exit from the guest but are only saved on vcpu_put. >> @@ -114,6 +245,8 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) >> case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); >> case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); >> case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); >> + case SP_EL2: return read_sysreg(sp_el1); >> + case ELR_EL2: return read_sysreg_el1(SYS_ELR); >> } >> >> immediate_read: >> @@ -125,6 +258,34 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) >> if (!vcpu->arch.sysregs_loaded_on_cpu) >> goto immediate_write; >> >> + if (unlikely(sysreg_is_el2(reg))) { >> + const struct el2_sysreg_map *el2_reg; >> + >> + if (!is_hyp_ctxt(vcpu)) >> + goto immediate_write; >> + >> + /* Store the EL2 version in the sysregs array. */ >> + __vcpu_sys_reg(vcpu, reg) = val; >> + >> + el2_reg = find_el2_sysreg(nested_sysreg_map, reg); >> + if (el2_reg) { >> + /* Does this register have an EL1 counterpart? */ >> + if (el2_reg->mapping == __INVALID_SYSREG__) >> + return; > > As in the read case, this is never reached and we'll go through the > switch case. Same thing. That's the mapping that is evaluated, not the sysreg itself. Thanks, M. -- Jazz is not dead. It just smells funny...