Two point of unification cache maintenance operations 'DC CVAU' and 'IC IVAU' are optional for implementors as per ARMv8 specification. This patch parses the updated CTR_EL0 register definition and adds the required changes to skip POU operations if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Signed-off-by: Philip Elcan <pelcan@xxxxxxxxxxxxxx> Signed-off-by: Shanker Donthineni <shankerd@xxxxxxxxxxxxxx> --- arch/arm64/include/asm/assembler.h | 48 ++++++++++++++++++++++++-------------- arch/arm64/include/asm/cache.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/mm/cache.S | 26 ++++++++++++++------- 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 3c78835..9eaa948 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -30,6 +30,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/thread_info.h> +#include <asm/cache.h> .macro save_and_disable_daif, flags mrs \flags, daif @@ -334,9 +335,9 @@ * raw_dcache_line_size - get the minimum D-cache line size on this CPU * from the CTR register. */ - .macro raw_dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm \tmp, \tmp, #16, #19 // cache line size encoding + .macro raw_dcache_line_size, reg, tmp, ctr + mrs \ctr, ctr_el0 // read CTR + ubfm \tmp, \ctr, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm @@ -344,9 +345,9 @@ /* * dcache_line_size - get the safe D-cache line size across all CPUs */ - .macro dcache_line_size, reg, tmp - read_ctr \tmp - ubfm \tmp, \tmp, #16, #19 // cache line size encoding + .macro dcache_line_size, reg, tmp, ctr + read_ctr \ctr + ubfm \tmp, \ctr, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm @@ -355,9 +356,9 @@ * raw_icache_line_size - get the minimum I-cache line size on this CPU * from the CTR register. */ - .macro raw_icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf // cache line size encoding + .macro raw_icache_line_size, reg, tmp, ctr + mrs \ctr, ctr_el0 // read CTR + and \tmp, \ctr, #0xf // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm @@ -365,9 +366,9 @@ /* * icache_line_size - get the safe I-cache line size across all CPUs */ - .macro icache_line_size, reg, tmp - read_ctr \tmp - and \tmp, \tmp, #0xf // cache line size encoding + .macro icache_line_size, reg, tmp, ctr + read_ctr \ctr + and \tmp, \ctr, #0xf // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm @@ -408,13 +409,21 @@ * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 - dcache_line_size \tmp1, \tmp2 + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2, tmp3 + dcache_line_size \tmp1, \tmp2, \tmp3 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) + .if (\op == cvau) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + tbnz \tmp3, #CTR_IDC_SHIFT, 9997f + dc cvau, \kaddr +alternative_else + dc civac, \kaddr + nop +alternative_endif + .elseif (\op == cvac) alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE dc \op, \kaddr alternative_else @@ -433,6 +442,7 @@ cmp \kaddr, \size b.lo 9998b dsb \domain +9997: .endm /* @@ -441,10 +451,11 @@ * * start, end: virtual addresses describing the region * label: A label to branch to on user fault. - * Corrupts: tmp1, tmp2 + * Corrupts: tmp1, tmp2, tmp3 */ - .macro invalidate_icache_by_line start, end, tmp1, tmp2, label - icache_line_size \tmp1, \tmp2 + .macro invalidate_icache_by_line start, end, tmp1, tmp2, tmp3, label + icache_line_size \tmp1, \tmp2, \tmp3 + tbnz \tmp3, #CTR_DIC_SHIFT, 9996f sub \tmp2, \tmp1, #1 bic \tmp2, \start, \tmp2 9997: @@ -454,6 +465,7 @@ b.lo 9997b dsb ish isb +9996: .endm /* diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ea9bb4e..aea533b 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -22,6 +22,8 @@ #define CTR_L1IP_MASK 3 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 +#define CTR_IDC_SHIFT 28 +#define CTR_DIC_SHIFT 29 #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 29b1f87..f42bb5a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -200,6 +200,8 @@ static int __init register_cpu_hwcaps_dumper(void) static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 0), /* DIC */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 0), /* IDC */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 758bde7..5764af8 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,6 +24,7 @@ #include <asm/cpufeature.h> #include <asm/alternative.h> #include <asm/asm-uaccess.h> +#include <asm/cache.h> /* * flush_icache_range(start,end) @@ -50,7 +51,12 @@ ENTRY(flush_icache_range) */ ENTRY(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 - dcache_line_size x2, x3 + dcache_line_size x2, x3, x4 +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + tbnz x4, #CTR_IDC_SHIFT, 8f +alternative_else + nop +alternative_endif sub x3, x2, #1 bic x4, x0, x3 1: @@ -60,7 +66,9 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE b.lo 1b dsb ish - invalidate_icache_by_line x0, x1, x2, x3, 9f +8: + invalidate_icache_by_line x0, x1, x2, x3, x4, 9f + mov x0, #0 1: uaccess_ttbr0_disable x1, x2 @@ -82,7 +90,7 @@ ENDPROC(__flush_cache_user_range) ENTRY(invalidate_icache_range) uaccess_ttbr0_enable x2, x3, x4 - invalidate_icache_by_line x0, x1, x2, x3, 2f + invalidate_icache_by_line x0, x1, x2, x3, x4, 2f mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 @@ -102,7 +110,7 @@ ENDPROC(invalidate_icache_range) * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_by_line_op civac, sy, x0, x1, x2, x3 + dcache_by_line_op civac, sy, x0, x1, x2, x3, x4 ret ENDPIPROC(__flush_dcache_area) @@ -116,7 +124,7 @@ ENDPIPROC(__flush_dcache_area) * - size - size in question */ ENTRY(__clean_dcache_area_pou) - dcache_by_line_op cvau, ish, x0, x1, x2, x3 + dcache_by_line_op cvau, ish, x0, x1, x2, x3, x4 ret ENDPROC(__clean_dcache_area_pou) @@ -140,7 +148,7 @@ ENTRY(__inval_dcache_area) */ __dma_inv_area: add x1, x1, x0 - dcache_line_size x2, x3 + dcache_line_size x2, x3, x4 sub x3, x2, #1 tst x1, x3 // end cache line aligned? bic x1, x1, x3 @@ -178,7 +186,7 @@ ENTRY(__clean_dcache_area_poc) * - size - size in question */ __dma_clean_area: - dcache_by_line_op cvac, sy, x0, x1, x2, x3 + dcache_by_line_op cvac, sy, x0, x1, x2, x3, x4 ret ENDPIPROC(__clean_dcache_area_poc) ENDPROC(__dma_clean_area) @@ -193,7 +201,7 @@ ENDPROC(__dma_clean_area) * - size - size in question */ ENTRY(__clean_dcache_area_pop) - dcache_by_line_op cvap, sy, x0, x1, x2, x3 + dcache_by_line_op cvap, sy, x0, x1, x2, x3, x4 ret ENDPIPROC(__clean_dcache_area_pop) @@ -206,7 +214,7 @@ ENDPIPROC(__clean_dcache_area_pop) * - size - size in question */ ENTRY(__dma_flush_area) - dcache_by_line_op civac, sy, x0, x1, x2, x3 + dcache_by_line_op civac, sy, x0, x1, x2, x3, x4 ret ENDPIPROC(__dma_flush_area) -- Qualcomm Datacenter Technologies, Inc. on behalf of the Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project. _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm