On 6/21/19 10:37 AM, Marc Zyngier wrote: > Extract the direct HW accessors for later reuse. > > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> > --- > arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++----------------- > 1 file changed, 139 insertions(+), 108 deletions(-) > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 2b8734f75a09..e181359adadf 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, > return entry; > } > > +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) > +{ > + /* > + * System registers listed in the switch are not saved on every > + * exit from the guest but are only saved on vcpu_put. > + * > + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > + * should never be listed below, because the guest cannot modify its > + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's > + * thread when emulating cross-VCPU communication. > + */ > + switch (reg) { > + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; > + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; > + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; > + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; > + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; > + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; > + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; > + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; > + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; > + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; > + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; > + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; > + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; > + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; > + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; > + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; > + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; > + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; > + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; > + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; > + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; > + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; > + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; > + default: return false; > + } > + > + return true; > +} > + > +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) > +{ > + /* > + * System registers listed in the switch are not restored on every > + * entry to the guest but are only restored on vcpu_load. > + * > + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > + * should never be listed below, because the the MPIDR should only be > + * set once, before running the VCPU, and never changed later. > + */ > + switch (reg) { > + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; > + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; > + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; > + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; > + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; > + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; > + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; > + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; > + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; > + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; > + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; > + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; > + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; > + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; > + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; > + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; > + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; > + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; > + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; > + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; > + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; > + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; > + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; > + default: return false; > + } > + > + return true; > +} > + > u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) > { > - u64 val; > + u64 val = 0x8badf00d8badf00d; > > if (!vcpu->arch.sysregs_loaded_on_cpu) > - goto immediate_read; > + goto memory_read; > > if (unlikely(sysreg_is_el2(reg))) { > const struct el2_sysreg_map *el2_reg; > > if (!is_hyp_ctxt(vcpu)) > - goto immediate_read; > + goto memory_read; > > switch (reg) { > + case ELR_EL2: > + return read_sysreg_el1(SYS_ELR); > case SPSR_EL2: > val = read_sysreg_el1(SYS_SPSR); > return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val); > } > > el2_reg = find_el2_sysreg(nested_sysreg_map, reg); > - if (el2_reg) { > - /* > - * If this register does not have an EL1 counterpart, > - * then read the stored EL2 version. > - */ > - if (el2_reg->mapping == __INVALID_SYSREG__) > - goto immediate_read; > - > - /* Get the current version of the EL1 counterpart. */ > - reg = el2_reg->mapping; > - } > - } else { > - /* EL1 register can't be on the CPU if the guest is in vEL2. */ > - if (unlikely(is_hyp_ctxt(vcpu))) > - goto immediate_read; > + BUG_ON(!el2_reg); > + > + /* > + * If this register does not have an EL1 counterpart, > + * then read the stored EL2 version. > + */ > + if (el2_reg->mapping == __INVALID_SYSREG__) > + goto memory_read; > + > + if (!vcpu_el2_e2h_is_set(vcpu) && > + el2_reg->translate) > + goto memory_read; Nit: the condition can be written on one line. This condition wasn't present in patch 13 which introduced EL2 register handling, and I'm struggling to understand what it does. As I understand the code, this condition basically translates into: - if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read it from memory. - if the register is an EL2 register whose value is written unmodified to the corresponding EL1 register, then read the corresponding EL1 register and return that value. Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be reflected in the corresponding EL2 register. I think it's safe to always return the value from memory. I tried testing this with the following patch: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1235a88ec575..27d39bb9564d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) el2_reg = find_el2_sysreg(nested_sysreg_map, reg); BUG_ON(!el2_reg); + if (!vcpu_el2_e2h_is_set(vcpu)) + goto memory_read; + /* * If this register does not have an EL1 counterpart, * then read the stored EL2 version. @@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) if (el2_reg->mapping == __INVALID_SYSREG__) goto memory_read; - if (!vcpu_el2_e2h_is_set(vcpu) && - el2_reg->translate) - goto memory_read; - /* Get the current version of the EL1 counterpart. */ reg = el2_reg->mapping; WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val)); I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe hypervisor. > + > + /* Get the current version of the EL1 counterpart. */ > + reg = el2_reg->mapping; > + WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val)); > + return val; > } > > - /* > - * System registers listed in the switch are not saved on every > - * exit from the guest but are only saved on vcpu_put. > - * > - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > - * should never be listed below, because the guest cannot modify its > - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's > - * thread when emulating cross-VCPU communication. > - */ > - switch (reg) { > - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); > - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); > - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); > - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); > - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); > - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); > - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); > - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); > - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); > - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); > - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); > - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); > - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); > - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); > - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); > - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); > - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); > - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); > - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); > - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); > - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); > - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); > - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); > - case SP_EL2: return read_sysreg(sp_el1); > - case ELR_EL2: return read_sysreg_el1(SYS_ELR); > - } > + /* EL1 register can't be on the CPU if the guest is in vEL2. */ > + if (unlikely(is_hyp_ctxt(vcpu))) > + goto memory_read; > + > + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) > + return val; > > -immediate_read: > +memory_read: > return __vcpu_sys_reg(vcpu, reg); > } > > void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) > { > if (!vcpu->arch.sysregs_loaded_on_cpu) > - goto immediate_write; > + goto memory_write; > > if (unlikely(sysreg_is_el2(reg))) { > const struct el2_sysreg_map *el2_reg; > > if (!is_hyp_ctxt(vcpu)) > - goto immediate_write; > + goto memory_write; > > - /* Store the EL2 version in the sysregs array. */ > + /* > + * Always store a copy of the write to memory to avoid having > + * to reverse-translate virtual EL2 system registers for a > + * non-VHE guest hypervisor. > + */ > __vcpu_sys_reg(vcpu, reg) = val; > > switch (reg) { > + case ELR_EL2: > + write_sysreg_el1(val, SYS_ELR); > + return; > case SPSR_EL2: > val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val); > write_sysreg_el1(val, SYS_SPSR); > @@ -282,61 +344,30 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) > } > > el2_reg = find_el2_sysreg(nested_sysreg_map, reg); > - if (el2_reg) { > - /* Does this register have an EL1 counterpart? */ > - if (el2_reg->mapping == __INVALID_SYSREG__) > - return; > + WARN(!el2_reg, "reg: %d\n", reg); > > - if (!vcpu_el2_e2h_is_set(vcpu) && > - el2_reg->translate) > - val = el2_reg->translate(val); > + /* Does this register have an EL1 counterpart? */ > + if (el2_reg->mapping == __INVALID_SYSREG__) > + goto memory_write; > > - /* Redirect this to the EL1 version of the register. */ > - reg = el2_reg->mapping; > - } > - } else { > - /* EL1 register can't be on the CPU if the guest is in vEL2. */ > - if (unlikely(is_hyp_ctxt(vcpu))) > - goto immediate_write; > - } > + if (!vcpu_el2_e2h_is_set(vcpu) && > + el2_reg->translate) > + val = el2_reg->translate(val); > > - /* > - * System registers listed in the switch are not restored on every > - * entry to the guest but are only restored on vcpu_load. > - * > - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > - * should never be listed below, because the the MPIDR should only be > - * set once, before running the VCPU, and never changed later. > - */ > - switch (reg) { > - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; > - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; > - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; > - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; > - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; > - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; > - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; > - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; > - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; > - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; > - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; > - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; > - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; > - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; > - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; > - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; > - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; > - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; > - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; > - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; > - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; > - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; > - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; > - case SP_EL2: write_sysreg(val, sp_el1); return; > - case ELR_EL2: write_sysreg_el1(val, SYS_ELR); return; > + /* Redirect this to the EL1 version of the register. */ > + reg = el2_reg->mapping; > + WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg)); > + return; > } > > -immediate_write: > + /* EL1 register can't be on the CPU if the guest is in vEL2. */ > + if (unlikely(is_hyp_ctxt(vcpu))) > + goto memory_write; > + > + if (__vcpu_write_sys_reg_to_cpu(val, reg)) > + return; > + > +memory_write: > __vcpu_sys_reg(vcpu, reg) = val; > } >