Expose the availability of the BFloat16 (BF16) format support in the CPUs. BF16 is a new 16-bit floating point format different from the half precision format defined by the IEEE-754-2008. BF16 extensions add support for new instructions for both FP/SIMD and SVE. Advertise these features individually to the userspace via ELF HWCAP. Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> Cc: Suzuki K Poulose <suzuki.poulose@xxxxxxx> Cc: linux-doc@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx> --- Documentation/arm64/elf_hwcaps.rst | 7 +++++++ arch/arm64/include/asm/hwcap.h | 2 ++ arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/include/uapi/asm/hwcap.h | 2 ++ arch/arm64/kernel/cpufeature.c | 5 +++++ arch/arm64/kernel/cpuinfo.c | 2 ++ 6 files changed, 22 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 7fa3d21..776b2fe 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -204,6 +204,13 @@ HWCAP2_FRINT Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001. +HWCAP2_BF16 + + Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0001. + +HWCAP2_SVEBF16 + + Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001. 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 3d2f247..bd8bf48 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -86,6 +86,8 @@ #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) #define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) #define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) +#define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16) +#define KERNEL_HWCAP_SVEBF16 __khwcap2_feature(SVEBF16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6e919fa..6db3a9b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -553,6 +553,7 @@ #define ID_AA64ISAR0_AES_SHIFT 4 /* id_aa64isar1 */ +#define ID_AA64ISAR1_BF16_SHIFT 44 #define ID_AA64ISAR1_SB_SHIFT 36 #define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 @@ -564,6 +565,7 @@ #define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_DPB_SHIFT 0 +#define ID_AA64ISAR1_BF16 0x1 #define ID_AA64ISAR1_APA_NI 0x0 #define ID_AA64ISAR1_APA_ARCHITECTED 0x1 #define ID_AA64ISAR1_API_NI 0x0 @@ -607,12 +609,14 @@ /* id_aa64zfr0 */ #define ID_AA64ZFR0_SM4_SHIFT 40 #define ID_AA64ZFR0_SHA3_SHIFT 32 +#define ID_AA64ZFR0_BF16_SHIFT 20 #define ID_AA64ZFR0_BITPERM_SHIFT 16 #define ID_AA64ZFR0_AES_SHIFT 4 #define ID_AA64ZFR0_SVEVER_SHIFT 0 #define ID_AA64ZFR0_SM4 0x1 #define ID_AA64ZFR0_SHA3 0x1 +#define ID_AA64ZFR0_BF16 0x1 #define ID_AA64ZFR0_BITPERM 0x1 #define ID_AA64ZFR0_AES 0x1 #define ID_AA64ZFR0_AES_PMULL 0x2 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a1e72886..095090c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -65,5 +65,7 @@ #define HWCAP2_SVESM4 (1 << 6) #define HWCAP2_FLAGM2 (1 << 7) #define HWCAP2_FRINT (1 << 8) +#define HWCAP2_BF16 (1 << 9) +#define HWCAP2_SVEBF16 (1 << 10) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 04cf64e..f344cea 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -149,6 +149,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -186,6 +187,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1652,6 +1655,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_BF16, CAP_HWCAP, KERNEL_HWCAP_BF16), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), @@ -1660,6 +1664,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_PTR_AUTH diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 56bba74..10121f5 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -84,6 +84,8 @@ static const char *const hwcap_str[] = { "svesm4", "flagm2", "frint", + "bf16", + "svebf16", NULL }; -- 2.7.4