On Tue, 25 Feb 2025 00:53:57 +0000, Oliver Upton <oliver.upton@xxxxxxxxx> wrote: > > commit 90807748ca3a ("KVM: arm64: Hide SME system registers from > guests") added trap handling for SMIDR_EL1, treating it as UNDEFINED as > KVM does not support SME. This is right for the most part, however KVM > needs to set HCR_EL2.TID1 to _actually_ trap the register. > > Unfortunately, this comes with some collateral damage as TID1 forces > REVIDR_EL1 and AIDR_EL1 to trap as well. KVM has long treated these > registers as "invariant" which is an awful term for the following: > > - Userspace sees the boot CPU values on all vCPUs > > - The guest sees the hardware values of the CPU on which a vCPU is > scheduled > > Keep the plates spinning by adding trap handling for the affected > registers and repaint all of the "invariant" crud into terms of > identifying an implementation. Yes, at this point we only need to > set TID1 on SME hardware, but REVIDR_EL1 and AIDR_EL1 are about to > become mutable anyway. > > Cc: Mark Brown <broonie@xxxxxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx > Fixes: 90807748ca3a ("KVM: arm64: Hide SME system registers from guests") > Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx> > --- > arch/arm64/include/asm/kvm_arm.h | 4 +- > arch/arm64/kvm/sys_regs.c | 175 ++++++++++++++++--------------- > 2 files changed, 94 insertions(+), 85 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h > index 8d94a6c0ed5c..b01c01e55de5 100644 > --- a/arch/arm64/include/asm/kvm_arm.h > +++ b/arch/arm64/include/asm/kvm_arm.h > @@ -92,12 +92,12 @@ > * SWIO: Turn set/way invalidates into set/way clean+invalidate > * PTW: Take a stage2 fault if a stage1 walk steps in device memory > * TID3: Trap EL1 reads of group 3 ID registers > - * TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1 > + * TID1: Trap REVIDR_EL1, AIDR_EL1, and SMIDR_EL1 > */ > #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ > HCR_BSU_IS | HCR_FB | HCR_TACR | \ > HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ > - HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3) > + HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) > #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) > #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) > #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index f6cd1ea7fb55..f25a157622e3 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -2483,6 +2483,93 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, > return true; > } > > +/* > + * For historical (ahem ABI) reasons, KVM treats MIDR_EL1, REVIDR_EL1, and > + * AIDR_EL1 as "invariant" registers, meaning userspace cannot change them. For consistency, using the past tense would make a lot more sense. > + * The values made visible to userspace were the register values of the boot > + * CPU. > + * > + * At the same time, reads from these registers at EL1 previously were not > + * trapped, allowing the guest to read the actual hardware value. On big-little > + * machines, this means the VM can see different values depending on where a > + * given vCPU got scheduled. > + * > + * These registers are now trapped as collateral damage from SME, and what > + * follows attempts to give a user / guest view consistent with the existing > + * ABI. > + */ > +static bool access_imp_id_reg(struct kvm_vcpu *vcpu, > + struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + if (p->is_write) > + return write_to_read_only(vcpu, p, r); > + > + switch (reg_to_encoding(r)) { > + case SYS_REVIDR_EL1: > + p->regval = read_sysreg(revidr_el1); > + break; > + case SYS_AIDR_EL1: > + p->regval = read_sysreg(aidr_el1); > + break; > + default: > + WARN_ON_ONCE(1); > + } > + > + return true; > +} > + > +static u64 __ro_after_init boot_cpu_midr_val; > +static u64 __ro_after_init boot_cpu_revidr_val; > +static u64 __ro_after_init boot_cpu_aidr_val; > + > +static void init_imp_id_regs(void) > +{ > + boot_cpu_midr_val = read_sysreg(midr_el1); > + boot_cpu_revidr_val = read_sysreg(revidr_el1); > + boot_cpu_aidr_val = read_sysreg(aidr_el1); > +} > + > +static int get_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, > + u64 *val) > +{ > + switch (reg_to_encoding(r)) { > + case SYS_MIDR_EL1: > + *val = boot_cpu_midr_val; > + break; > + case SYS_REVIDR_EL1: > + *val = boot_cpu_revidr_val; > + break; > + case SYS_AIDR_EL1: > + *val = boot_cpu_aidr_val; > + break; > + default: > + WARN_ON_ONCE(1); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, > + u64 val) > +{ > + u64 expected; > + int ret; > + > + ret = get_imp_id_reg(vcpu, r, &expected); > + if (ret) > + return ret; > + > + return (expected == val) ? 0 : -EINVAL; > +} > + > +#define IMPLEMENTATION_ID(reg) { \ > + SYS_DESC(SYS_##reg), \ > + .access = access_imp_id_reg, \ > + .get_user = get_imp_id_reg, \ > + .set_user = set_imp_id_reg, \ > +} > > /* > * Architected system registers. > @@ -2532,7 +2619,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { > > { SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 }, > > + IMPLEMENTATION_ID(MIDR_EL1), > { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 }, > + IMPLEMENTATION_ID(REVIDR_EL1), > > /* > * ID regs: all ID_SANITISED() entries here must have corresponding > @@ -2804,6 +2893,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { > .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, > { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, > { SYS_DESC(SYS_SMIDR_EL1), undef_access }, > + IMPLEMENTATION_ID(AIDR_EL1), > { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, > ID_FILTERED(CTR_EL0, ctr_el0, > CTR_EL0_DIC_MASK | I don't think this is enough. You also need to augment the cp15[] table to handle trapping for the 32bit versions of REVIDR/AIDR. Thanks, M. -- Without deviation from the norm, progress is not possible.