On Tue, 05 Dec 2023 16:48:13 +0000, Mark Brown <broonie@xxxxxxxxxx> wrote: > > FEAT_FPMR introduces a new system register FPMR which allows configuration > of floating point behaviour, currently for FP8 specific features. Allow use > of this in guests, disabling the trap while guests are running and saving > and restoring the value along with the rest of the floating point state. > Since FPMR is stored immediately after the main floating point state we > share it with the hypervisor by adjusting the size of the shared region. > > Access to FPMR is covered by both a register specific trap HCRX_EL2.EnFPM > and the overall floating point access trap so we just unconditionally > enable the FPMR specific trap and rely on the floating point access trap to > detect guest floating point usage. > > Signed-off-by: Mark Brown <broonie@xxxxxxxxxx> > --- > arch/arm64/include/asm/kvm_arm.h | 2 +- > arch/arm64/include/asm/kvm_host.h | 4 +++- > arch/arm64/kvm/emulate-nested.c | 9 +++++++++ > arch/arm64/kvm/fpsimd.c | 20 +++++++++++++++++--- > arch/arm64/kvm/hyp/include/hyp/switch.h | 7 ++++++- > arch/arm64/kvm/sys_regs.c | 11 +++++++++++ > 6 files changed, 47 insertions(+), 6 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h > index 9f9239d86900..95f3b44e7c3a 100644 > --- a/arch/arm64/include/asm/kvm_arm.h > +++ b/arch/arm64/include/asm/kvm_arm.h > @@ -103,7 +103,7 @@ > #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) > > #define HCRX_GUEST_FLAGS \ > - (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \ > + (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM | \ We really should start making all of these things conditional. See below. > (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0)) > #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index f8d98985a39c..9885adff06fa 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -391,6 +391,8 @@ enum vcpu_sysreg { > CNTP_CVAL_EL0, > CNTP_CTL_EL0, > > + FPMR, > + > /* Memory Tagging Extension registers */ > RGSR_EL1, /* Random Allocation Tag Seed Register */ > GCR_EL1, /* Tag Control Register */ > @@ -517,7 +519,6 @@ struct kvm_vcpu_arch { > enum fp_type fp_type; > unsigned int sve_max_vl; > u64 svcr; > - u64 fpmr; Why do this change here? Why isn't done like that the first place? > > /* Stage 2 paging state used by the hardware on next switch */ > struct kvm_s2_mmu *hw_mmu; > @@ -576,6 +577,7 @@ struct kvm_vcpu_arch { > struct kvm_guest_debug_arch external_debug_state; > > struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ > + u64 *host_fpmr; /* hyp VA */ > struct task_struct *parent_task; > > struct { > diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c > index 06185216a297..802e5cde696f 100644 > --- a/arch/arm64/kvm/emulate-nested.c > +++ b/arch/arm64/kvm/emulate-nested.c > @@ -67,6 +67,8 @@ enum cgt_group_id { > CGT_HCR_TTLBIS, > CGT_HCR_TTLBOS, > > + CGT_HCRX_EnFPM, > + > CGT_MDCR_TPMCR, > CGT_MDCR_TPM, > CGT_MDCR_TDE, > @@ -279,6 +281,12 @@ static const struct trap_bits coarse_trap_bits[] = { > .mask = HCR_TTLBOS, > .behaviour = BEHAVE_FORWARD_ANY, > }, > + [CGT_HCRX_EnFPM] = { > + .index = HCRX_EL2, > + .value = HCRX_EL2_EnFPM, > + .mask = HCRX_EL2_EnFPM, > + .behaviour = BEHAVE_FORWARD_ANY, This looks wrong. HCRX_EL2.EnFPM is an enable bit. > + }, > [CGT_MDCR_TPMCR] = { > .index = MDCR_EL2, > .value = MDCR_EL2_TPMCR, > @@ -478,6 +486,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { > SR_TRAP(SYS_AIDR_EL1, CGT_HCR_TID1), > SR_TRAP(SYS_SMIDR_EL1, CGT_HCR_TID1), > SR_TRAP(SYS_CTR_EL0, CGT_HCR_TID2), > + SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM), > SR_TRAP(SYS_CCSIDR_EL1, CGT_HCR_TID2_TID4), > SR_TRAP(SYS_CCSIDR2_EL1, CGT_HCR_TID2_TID4), > SR_TRAP(SYS_CLIDR_EL1, CGT_HCR_TID2_TID4), > diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c > index e3e611e30e91..dee078625d0d 100644 > --- a/arch/arm64/kvm/fpsimd.c > +++ b/arch/arm64/kvm/fpsimd.c > @@ -14,6 +14,16 @@ > #include <asm/kvm_mmu.h> > #include <asm/sysreg.h> > > +static void *fpsimd_share_end(struct user_fpsimd_state *fpsimd) > +{ > + void *share_end = fpsimd + 1; > + > + if (cpus_have_final_cap(ARM64_HAS_FPMR)) > + share_end += sizeof(u64); > + > + return share_end; > +} This is horrible. Why can't you just have a new structure wrapping both user_fpsimd_state and fpmr? This is going to break in subtle ways, just like the SVE/SME stuff. > + > void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) > { > struct task_struct *p = vcpu->arch.parent_task; > @@ -23,7 +33,7 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) > return; > > fpsimd = &p->thread.uw.fpsimd_state; > - kvm_unshare_hyp(fpsimd, fpsimd + 1); > + kvm_unshare_hyp(fpsimd, fpsimd_share_end(fpsimd)); > put_task_struct(p); > } > > @@ -45,11 +55,15 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) > kvm_vcpu_unshare_task_fp(vcpu); > > /* Make sure the host task fpsimd state is visible to hyp: */ > - ret = kvm_share_hyp(fpsimd, fpsimd + 1); > + ret = kvm_share_hyp(fpsimd, fpsimd_share_end(fpsimd)); > if (ret) > return ret; > > vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); > + if (cpus_have_final_cap(ARM64_HAS_FPMR)) { > + WARN_ON_ONCE(¤t->thread.fpmr + 1 != fpsimd_share_end(fpsimd)); How can this happen? > + vcpu->arch.host_fpmr = kern_hyp_va(¤t->thread.fpmr); > + } We really need to stop piling the save/restore of stuff that isn't advertised to the guest. M. -- Without deviation from the norm, progress is not possible.