On Mon, Nov 15, 2021 at 03:28:11PM +0000, Mark Brown wrote: > diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h > index 7b23b16f21ce..6f8ca04b6566 100644 > --- a/arch/arm64/include/uapi/asm/hwcap.h > +++ b/arch/arm64/include/uapi/asm/hwcap.h > @@ -76,5 +76,13 @@ > #define HWCAP2_BTI (1 << 17) > #define HWCAP2_MTE (1 << 18) > #define HWCAP2_ECV (1 << 19) > +#define HWCAP2_SME (1 << 20) > +#define HWCAP2_SME_I16I64 (1 << 21) > +#define HWCAP2_SME_F64F64 (1 << 22) > +#define HWCAP2_SME_I8I32 (1 << 23) > +#define HWCAP2_SME_F16F32 (1 << 24) > +#define HWCAP2_SME_B16F32 (1 << 25) > +#define HWCAP2_SME_F32F32 (1 << 26) > +#define HWCAP2_SME_FA64 (1 << 27) At this pace we'll need HWCAP3 pretty soon (since we only allocated 32-bit in each). I wonder whether we could instead not bother at all and just provide user-space emulation for ID_AA64SMFR0_EL1. > #endif /* _UAPI__ASM_HWCAP_H */ > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 81824c7ea74f..3cf60819c354 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -246,6 +246,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { > }; > > static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { > + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), > @@ -278,6 +279,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { > ARM64_FTR_END, > }; > > +static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), > + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), > + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), > + ARM64_FTR_END, > +}; > + > static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { > ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), > ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), > @@ -628,6 +647,7 @@ static const struct __ftr_reg_entry { > ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, > &id_aa64pfr1_override), > ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), > + ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), > > /* Op1 = 0, CRn = 0, CRm = 5 */ > ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), > @@ -939,6 +959,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) > init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); > init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); > init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); > + init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0); > > if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) > init_32bit_cpu_features(&info->aarch32); > @@ -2370,6 +2391,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = { > .matches = has_cpuid_feature, > .min_field_value = 1, > }, > +#ifdef CONFIG_ARM64_SME > + { > + .desc = "Scalable Matrix Extension", > + .type = ARM64_CPUCAP_SYSTEM_FEATURE, > + .capability = ARM64_SME, > + .sys_reg = SYS_ID_AA64PFR1_EL1, > + .sign = FTR_UNSIGNED, > + .field_pos = ID_AA64PFR1_SME_SHIFT, > + .min_field_value = ID_AA64PFR1_SME, > + .matches = has_cpuid_feature, > + .cpu_enable = sme_kernel_enable, > + }, > + { > + .desc = "FA64", > + .type = ARM64_CPUCAP_SYSTEM_FEATURE, > + .capability = ARM64_SME_FA64, > + .sys_reg = SYS_ID_AA64SMFR0_EL1, > + .sign = FTR_UNSIGNED, > + .field_pos = ID_AA64SMFR0_FA64_SHIFT, > + .min_field_value = ID_AA64SMFR0_FA64, > + .matches = has_feature_flag, > + .cpu_enable = fa64_kernel_enable, > + }, I'll comment here rather than the patch introducing has_feature_flag(): an alternative would be to add a .field_width option and in feature_matches() use cpuid_feature_extract_field_width() directly. All the arm64_ftr_bits entries already have a width, so just generalise it for arm64_cpu_capabilities. -- Catalin