From: Lad Prabhakar <prabhakar.mahadev-lad.rj@xxxxxxxxxxxxxx> Currently, selecting which CMOs to use on a given platform is done using and ALTERNATIVE_X() macro. This was manageable when there were just two CMO implementations, but now that there are more and more platforms coming needing custom CMOs, the use of the ALTERNATIVE_X() macro is unmanageable. To avoid such issues this patch switches to use of function pointers instead of ALTERNATIVE_X() macro for cache management (the only drawback being performance over the previous approach). void (*clean_range)(unsigned long addr, unsigned long size); void (*inv_range)(unsigned long addr, unsigned long size); void (*flush_range)(unsigned long addr, unsigned long size); The above function pointers are provided to be overridden for platforms needing CMO. Convert ZICBOM and T-HEAD CMO to use function pointers. Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@xxxxxxxxxxxxxx> --- v6->v7 * Updated commit description * Fixed build issues when CONFIG_ERRATA_THEAD_CMO=n * Used static const struct ptr to register CMO ops * Dropped riscv_dma_noncoherent_cmo_ops * Moved ZICBOM CMO setup to setup.c v5->v6 * New patch --- arch/riscv/errata/thead/errata.c | 76 ++++++++++++++++++++++++ arch/riscv/include/asm/dma-noncoherent.h | 73 +++++++++++++++++++++++ arch/riscv/include/asm/errata_list.h | 53 ----------------- arch/riscv/kernel/setup.c | 49 ++++++++++++++- arch/riscv/mm/dma-noncoherent.c | 25 ++++++-- arch/riscv/mm/pmem.c | 6 +- 6 files changed, 221 insertions(+), 61 deletions(-) create mode 100644 arch/riscv/include/asm/dma-noncoherent.h diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 7e8d50ebb71a..4bb3f2baee03 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -11,10 +11,83 @@ #include <linux/uaccess.h> #include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/dma-noncoherent.h> #include <asm/errata_list.h> #include <asm/patch.h> #include <asm/vendorid_list.h> +#ifdef CONFIG_ERRATA_THEAD_CMO +/* + * dcache.ipa rs1 (invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01010 rs1 000 00000 0001011 + * dache.iva rs1 (invalida, virtual address) + * 0000001 00110 rs1 000 00000 0001011 + * + * dcache.cpa rs1 (clean, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01001 rs1 000 00000 0001011 + * dcache.cva rs1 (clean, virtual address) + * 0000001 00100 rs1 000 00000 0001011 + * + * dcache.cipa rs1 (clean then invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01011 rs1 000 00000 0001011 + * dcache.civa rs1 (... virtual address) + * 0000001 00111 rs1 000 00000 0001011 + * + * sync.s (make sure all cache operations finished) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000000 11001 00000 000 00000 0001011 + */ +#define THEAD_inval_A0 ".long 0x0265000b" +#define THEAD_clean_A0 ".long 0x0245000b" +#define THEAD_flush_A0 ".long 0x0275000b" +#define THEAD_SYNC_S ".long 0x0190000b" + +#define THEAD_CMO_OP(_op, _start, _size, _cachesize) \ + asm volatile("mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + THEAD_##_op##_A0 "\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + THEAD_SYNC_S \ + : : "r"(_cachesize), \ + "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_start) + (_size)) \ + : "a0") + +static void thead_cmo_clean_range(unsigned long addr, unsigned long size) +{ + THEAD_CMO_OP(clean, addr, size, riscv_cbom_block_size); +} + +static void thead_cmo_flush_range(unsigned long addr, unsigned long size) +{ + THEAD_CMO_OP(flush, addr, size, riscv_cbom_block_size); +} + +static void thead_cmo_inval_range(unsigned long addr, unsigned long size) +{ + THEAD_CMO_OP(inval, addr, size, riscv_cbom_block_size); +} + +static const struct riscv_cache_ops thead_cmo_ops = { + .clean_range = &thead_cmo_clean_range, + .inv_range = &thead_cmo_inval_range, + .flush_range = &thead_cmo_flush_range, +}; + +static void thead_register_cmo_ops(void) +{ + riscv_noncoherent_register_cache_ops(&thead_cmo_ops); +} +#else +static void thead_register_cmo_ops(void) {} +#endif + static bool errata_probe_pbmt(unsigned int stage, unsigned long arch_id, unsigned long impid) { @@ -45,6 +118,9 @@ static bool errata_probe_cmo(unsigned int stage, riscv_cbom_block_size = L1_CACHE_BYTES; riscv_noncoherent_supported(); + + thead_register_cmo_ops(); + return true; } diff --git a/arch/riscv/include/asm/dma-noncoherent.h b/arch/riscv/include/asm/dma-noncoherent.h new file mode 100644 index 000000000000..fc8d16c89f01 --- /dev/null +++ b/arch/riscv/include/asm/dma-noncoherent.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Renesas Electronics Corp. + */ + +#ifndef __ASM_DMA_NONCOHERENT_H +#define __ASM_DMA_NONCOHERENT_H + +#include <linux/dma-direct.h> + +#ifdef CONFIG_RISCV_DMA_NONCOHERENT + +/* + * struct riscv_cache_ops - Structure for CMO function pointers + * + * @clean_range: Function pointer for clean cache + * @inv_range: Function pointer for invalidate cache + * @flush_range: Function pointer for flushing the cache + */ +struct riscv_cache_ops { + void (*clean_range)(unsigned long addr, unsigned long size); + void (*inv_range)(unsigned long addr, unsigned long size); + void (*flush_range)(unsigned long addr, unsigned long size); +}; + +extern struct riscv_cache_ops noncoherent_cache_ops; + +void riscv_noncoherent_register_cache_ops(const struct riscv_cache_ops *ops); + +static inline void riscv_dma_noncoherent_clean(void *vaddr, size_t size) +{ + if (noncoherent_cache_ops.clean_range) { + unsigned long addr = (unsigned long)vaddr; + + noncoherent_cache_ops.clean_range(addr, size); + } +} + +static inline void riscv_dma_noncoherent_flush(void *vaddr, size_t size) +{ + if (noncoherent_cache_ops.flush_range) { + unsigned long addr = (unsigned long)vaddr; + + noncoherent_cache_ops.flush_range(addr, size); + } +} + +static inline void riscv_dma_noncoherent_inval(void *vaddr, size_t size) +{ + if (noncoherent_cache_ops.inv_range) { + unsigned long addr = (unsigned long)vaddr; + + noncoherent_cache_ops.inv_range(addr, size); + } +} + +static inline void riscv_dma_noncoherent_pmem_clean(void *vaddr, size_t size) +{ + riscv_dma_noncoherent_clean(vaddr, size); +} + +static inline void riscv_dma_noncoherent_pmem_inval(void *vaddr, size_t size) +{ + riscv_dma_noncoherent_inval(vaddr, size); +} +#else + +static inline void riscv_dma_noncoherent_pmem_clean(void *vaddr, size_t size) {} +static inline void riscv_dma_noncoherent_pmem_inval(void *vaddr, size_t size) {} + +#endif /* CONFIG_RISCV_DMA_NONCOHERENT */ + +#endif /* __ASM_DMA_NONCOHERENT_H */ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8c..112429910ee6 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -89,59 +89,6 @@ asm volatile(ALTERNATIVE( \ #define ALT_THEAD_PMA(_val) #endif -/* - * dcache.ipa rs1 (invalidate, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01010 rs1 000 00000 0001011 - * dache.iva rs1 (invalida, virtual address) - * 0000001 00110 rs1 000 00000 0001011 - * - * dcache.cpa rs1 (clean, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01001 rs1 000 00000 0001011 - * dcache.cva rs1 (clean, virtual address) - * 0000001 00100 rs1 000 00000 0001011 - * - * dcache.cipa rs1 (clean then invalidate, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01011 rs1 000 00000 0001011 - * dcache.civa rs1 (... virtual address) - * 0000001 00111 rs1 000 00000 0001011 - * - * sync.s (make sure all cache operations finished) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000000 11001 00000 000 00000 0001011 - */ -#define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0245000b" -#define THEAD_flush_A0 ".long 0x0275000b" -#define THEAD_SYNC_S ".long 0x0190000b" - -#define ALT_CMO_OP(_op, _start, _size, _cachesize) \ -asm volatile(ALTERNATIVE_2( \ - __nops(6), \ - "mv a0, %1\n\t" \ - "j 2f\n\t" \ - "3:\n\t" \ - CBO_##_op(a0) \ - "add a0, a0, %0\n\t" \ - "2:\n\t" \ - "bltu a0, %2, 3b\n\t" \ - "nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ - "mv a0, %1\n\t" \ - "j 2f\n\t" \ - "3:\n\t" \ - THEAD_##_op##_A0 "\n\t" \ - "add a0, a0, %0\n\t" \ - "2:\n\t" \ - "bltu a0, %2, 3b\n\t" \ - THEAD_SYNC_S, THEAD_VENDOR_ID, \ - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ - : : "r"(_cachesize), \ - "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ - "r"((unsigned long)(_start) + (_size)) \ - : "a0") - #define THEAD_C9XX_RV_IRQ_PMU 17 #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 5d3184cbf518..b2b69d1dec23 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -24,6 +24,7 @@ #include <asm/alternative.h> #include <asm/cacheflush.h> #include <asm/cpu_ops.h> +#include <asm/dma-noncoherent.h> #include <asm/early_ioremap.h> #include <asm/pgtable.h> #include <asm/setup.h> @@ -262,6 +263,50 @@ static void __init parse_dtb(void) #endif } +#ifdef CONFIG_RISCV_ISA_ZICBOM + +#define ZICBOM_CMO_OP(_op, _start, _size, _cachesize) \ + asm volatile("mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + CBO_##_op(a0) \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + : : "r"(_cachesize), \ + "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_start) + (_size)) \ + : "a0") + +static void zicbom_cmo_clean_range(unsigned long addr, unsigned long size) +{ + ZICBOM_CMO_OP(clean, addr, size, riscv_cbom_block_size); +} + +static void zicbom_cmo_flush_range(unsigned long addr, unsigned long size) +{ + ZICBOM_CMO_OP(flush, addr, size, riscv_cbom_block_size); +} + +static void zicbom_cmo_inval_range(unsigned long addr, unsigned long size) +{ + ZICBOM_CMO_OP(inval, addr, size, riscv_cbom_block_size); +} + +const struct riscv_cache_ops zicbom_cmo_ops = { + .clean_range = &zicbom_cmo_clean_range, + .inv_range = &zicbom_cmo_inval_range, + .flush_range = &zicbom_cmo_flush_range, +}; + +static void zicbom_register_cmo_ops(void) +{ + riscv_noncoherent_register_cache_ops(&zicbom_cmo_ops); +} +#else +static void zicbom_register_cmo_ops(void) {} +#endif + void __init setup_arch(char **cmdline_p) { parse_dtb(); @@ -301,8 +346,10 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); apply_boot_alternatives(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && - riscv_isa_extension_available(NULL, ZICBOM)) + riscv_isa_extension_available(NULL, ZICBOM)) { riscv_noncoherent_supported(); + zicbom_register_cmo_ops(); + } } static int __init topology_init(void) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index b9a9f57e02be..ab8f6dc67914 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -9,28 +9,36 @@ #include <linux/dma-map-ops.h> #include <linux/mm.h> #include <asm/cacheflush.h> +#include <asm/dma-noncoherent.h> static bool noncoherent_supported; +struct riscv_cache_ops noncoherent_cache_ops = { + .clean_range = NULL, + .inv_range = NULL, + .flush_range = NULL, +}; +EXPORT_SYMBOL_GPL(noncoherent_cache_ops); + static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + riscv_dma_noncoherent_clean(vaddr, size); } static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); + riscv_dma_noncoherent_inval(vaddr, size); } static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + riscv_dma_noncoherent_flush(vaddr, size); } static inline bool arch_sync_dma_clean_before_fromdevice(void) @@ -50,7 +58,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { void *flush_addr = page_address(page); - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); + riscv_dma_noncoherent_flush(flush_addr, size); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, @@ -75,3 +83,12 @@ void riscv_noncoherent_supported(void) "Non-coherent DMA support enabled without a block size\n"); noncoherent_supported = true; } + +void riscv_noncoherent_register_cache_ops(const struct riscv_cache_ops *ops) +{ + if (!ops) + return; + + noncoherent_cache_ops = *ops; +} +EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c index 089df92ae876..aad7c2209eca 100644 --- a/arch/riscv/mm/pmem.c +++ b/arch/riscv/mm/pmem.c @@ -6,16 +6,16 @@ #include <linux/export.h> #include <linux/libnvdimm.h> -#include <asm/cacheflush.h> +#include <asm/dma-noncoherent.h> void arch_wb_cache_pmem(void *addr, size_t size) { - ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); + riscv_dma_noncoherent_pmem_clean(addr, size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { - ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); + riscv_dma_noncoherent_pmem_inval(addr, size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); -- 2.25.1